diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:32:52 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:32:52 +0000 |
commit | 1189dbaa8c30085bd4f9cc7759a459160bcf2375 (patch) | |
tree | 72cc0027ca93808037bb0e9bf19172c6021053e9 | |
parent | 442906470441699a0cffb2c475ee2fa6e6e57515 (diff) | |
parent | d93e1dfac8711cfed1a9d9cd1876a788b83945cd (diff) | |
download | src-1189dbaa8c30085bd4f9cc7759a459160bcf2375.tar.gz src-1189dbaa8c30085bd4f9cc7759a459160bcf2375.zip |
Update lld to trunk r290819 and resolve conflicts.
Notes
Notes:
svn path=/projects/clang400-import/; revision=311144
130 files changed, 14039 insertions, 7757 deletions
diff --git a/contrib/llvm/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/CMakeLists.txt index 46ca748f8fac..23cef2e9fc67 100644 --- a/contrib/llvm/tools/lld/CMakeLists.txt +++ b/contrib/llvm/tools/lld/CMakeLists.txt @@ -1,3 +1,54 @@ +# Check if lld is built as a standalone project. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + project(lld) + cmake_minimum_required(VERSION 3.4.3) + + set(CMAKE_INCLUDE_CURRENT_DIR ON) + set(LLD_BUILT_STANDALONE TRUE) + + find_program(LLVM_CONFIG_PATH "llvm-config" DOC "Path to llvm-config binary") + if(NOT LLVM_CONFIG_PATH) + message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH") + endif() + + execute_process(COMMAND "${LLVM_CONFIG_PATH}" "--obj-root" "--includedir" + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LLVM_CONFIG_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(HAD_ERROR) + message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") + endif() + + string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" LLVM_CONFIG_OUTPUT "${LLVM_CONFIG_OUTPUT}") + + list(GET LLVM_CONFIG_OUTPUT 0 OBJ_ROOT) + list(GET LLVM_CONFIG_OUTPUT 1 MAIN_INCLUDE_DIR) + + set(LLVM_OBJ_ROOT ${OBJ_ROOT} CACHE PATH "path to LLVM build tree") + set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include") + + file(TO_CMAKE_PATH ${LLVM_OBJ_ROOT} LLVM_BINARY_DIR) + set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") + + if(NOT EXISTS "${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + message(FATAL_ERROR "LLVMConfig.cmake not found") + endif() + include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake") + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") + + set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") + include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) + link_directories(${LLVM_LIBRARY_DIRS}) + + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) + find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + + include(AddLLVM) + include(TableGen) + include(HandleLLVMOptions) +endif() + set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include ) set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -55,6 +106,8 @@ endif() list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") +include(AddLLD) + option(LLD_USE_VTUNE "Enable VTune user task tracking." OFF) @@ -67,6 +120,8 @@ if (LLD_USE_VTUNE) endif() endif() +option(LLD_BUILD_TOOLS + "Build the lld tools. If OFF, just generate build targets." ON) if (MSVC) add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' @@ -87,12 +142,6 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) ) endif() -macro(add_lld_library name) - add_llvm_library(${name} ${ARGN}) - set_target_properties(${name} PROPERTIES FOLDER "lld libraries") -endmacro(add_lld_library) - - add_subdirectory(lib) add_subdirectory(tools/lld) diff --git a/contrib/llvm/tools/lld/COFF/CMakeLists.txt b/contrib/llvm/tools/lld/COFF/CMakeLists.txt index ad5b6fda1693..70a33b9fdd81 100644 --- a/contrib/llvm/tools/lld/COFF/CMakeLists.txt +++ b/contrib/llvm/tools/lld/COFF/CMakeLists.txt @@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(COFFOptionsTableGen) +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + add_lld_library(lldCOFF Chunks.cpp DLL.cpp @@ -14,6 +18,7 @@ add_lld_library(lldCOFF MarkLive.cpp ModuleDef.cpp PDB.cpp + Strings.cpp SymbolTable.cpp Symbols.cpp Writer.cpp @@ -21,6 +26,9 @@ add_lld_library(lldCOFF LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Core + DebugInfoCodeView + DebugInfoMSF + DebugInfoPDB LTO LibDriver Object @@ -30,7 +38,11 @@ add_lld_library(lldCOFF Option Support - LINK_LIBS ${PTHREAD_LIB} - ) + LINK_LIBS + lldCore + ${PTHREAD_LIB} -add_dependencies(lldCOFF COFFOptionsTableGen intrinsics_gen) + DEPENDS + COFFOptionsTableGen + ${tablegen_deps} + ) diff --git a/contrib/llvm/tools/lld/COFF/Chunks.cpp b/contrib/llvm/tools/lld/COFF/Chunks.cpp index 1c1b18176aa2..7f0dfa92ec10 100644 --- a/contrib/llvm/tools/lld/COFF/Chunks.cpp +++ b/contrib/llvm/tools/lld/COFF/Chunks.cpp @@ -28,7 +28,7 @@ namespace lld { namespace coff { SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) - : Chunk(SectionKind), Repl(this), File(F), Header(H), + : Chunk(SectionKind), Repl(this), Header(H), File(F), Relocs(File->getCOFFObj()->getRelocations(Header)), NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { // Initialize SectionName. @@ -81,11 +81,23 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, } static void applyMOV(uint8_t *Off, uint16_t V) { - or16(Off, ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); - or16(Off + 2, ((V & 0x700) << 4) | (V & 0xff)); + write16le(Off, (read16le(Off) & 0xfbf0) | ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); + write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff)); +} + +static uint16_t readMOV(uint8_t *Off) { + uint16_t Opcode1 = read16le(Off); + uint16_t Opcode2 = read16le(Off + 2); + uint16_t Imm = (Opcode2 & 0x00ff) | ((Opcode2 >> 4) & 0x0700); + Imm |= ((Opcode1 << 1) & 0x0800) | ((Opcode1 & 0x000f) << 12); + return Imm; } static void applyMOV32T(uint8_t *Off, uint32_t V) { + uint16_t ImmW = readMOV(Off); // read MOVW operand + uint16_t ImmT = readMOV(Off + 4); // read MOVT operand + uint32_t Imm = ImmW | (ImmT << 16); + V += Imm; // add the immediate offset applyMOV(Off, V); // set MOVW operand applyMOV(Off + 4, V >> 16); // set MOVT operand } @@ -99,11 +111,14 @@ static void applyBranch20T(uint8_t *Off, int32_t V) { } static void applyBranch24T(uint8_t *Off, int32_t V) { + if (!isInt<25>(V)) + fatal("relocation out of range"); uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = ((~V >> 23) & 1) ^ S; uint32_t J2 = ((~V >> 22) & 1) ^ S; or16(Off, (S << 10) | ((V >> 12) & 0x3ff)); - or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); + // Clear out the J1 and J2 bits which may be set. + write16le(Off + 2, (read16le(Off + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); } void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, @@ -119,6 +134,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break; case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; + case IMAGE_REL_ARM_SECREL: add32(Off, Sym->getSecrel()); break; default: fatal("unsupported relocation type"); } @@ -134,7 +150,7 @@ void SectionChunk::writeTo(uint8_t *Buf) const { // Apply relocations. for (const coff_relocation &Rel : Relocs) { uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); Defined *Sym = cast<Defined>(Body); uint64_t P = RVA + Rel.VirtualAddress; switch (Config->Machine) { @@ -187,7 +203,7 @@ void SectionChunk::getBaserels(std::vector<Baserel> *Res) { uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); if (isa<DefinedAbsolute>(Body)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); @@ -210,7 +226,7 @@ void SectionChunk::printDiscardedMessage() const { // Removed by dead-stripping. If it's removed by ICF, ICF already // printed out the name, so don't repeat that here. if (Sym && this == Repl) - llvm::outs() << "Discarded " << Sym->getName() << "\n"; + outs() << "Discarded " << Sym->getName() << "\n"; } StringRef SectionChunk::getDebugName() { @@ -233,7 +249,7 @@ void SectionChunk::replace(SectionChunk *Other) { CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { // Common symbols are aligned on natural boundaries up to 32 bytes. // This is what MSVC link.exe does. - Align = std::min(uint64_t(32), NextPowerOf2(Sym.getValue())); + Align = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue())); } uint32_t CommonChunk::getPermissions() const { diff --git a/contrib/llvm/tools/lld/COFF/Chunks.h b/contrib/llvm/tools/lld/COFF/Chunks.h index cd0e2e69ef5d..59e36b84c9b0 100644 --- a/contrib/llvm/tools/lld/COFF/Chunks.h +++ b/contrib/llvm/tools/lld/COFF/Chunks.h @@ -17,7 +17,6 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Object/COFF.h" -#include <atomic> #include <utility> #include <vector> @@ -29,7 +28,6 @@ using llvm::object::COFFSymbolRef; using llvm::object::SectionRef; using llvm::object::coff_relocation; using llvm::object::coff_section; -using llvm::sys::fs::file_magic; class Baserel; class Defined; @@ -187,11 +185,12 @@ public: // Auxiliary Format 5: Section Definitions. Used for ICF. uint32_t Checksum = 0; + const coff_section *Header; + private: // A file this chunk was created from. ObjectFile *File; - const coff_section *Header; StringRef SectionName; std::vector<SectionChunk *> AssocChildren; llvm::iterator_range<const coff_relocation *> Relocs; @@ -202,7 +201,7 @@ private: // Used for ICF (Identical COMDAT Folding) void replace(SectionChunk *Other); - std::atomic<uint64_t> GroupID = { 0 }; + uint32_t Color[2] = {0, 0}; // Sym points to a section symbol if this is a COMDAT chunk. DefinedRegular *Sym = nullptr; diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h index a5472e937fa1..0fa3338aa28c 100644 --- a/contrib/llvm/tools/lld/COFF/Config.h +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -26,7 +26,8 @@ using llvm::StringRef; class DefinedAbsolute; class DefinedRelative; class StringChunk; -class Undefined; +struct Symbol; +class SymbolBody; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -37,7 +38,7 @@ static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; struct Export { StringRef Name; // N in /export:N or /export:E=N StringRef ExtName; // E in /export:E=N - Undefined *Sym = nullptr; + SymbolBody *Sym = nullptr; uint16_t Ordinal = 0; bool Noname = false; bool Data = false; @@ -61,6 +62,13 @@ struct Export { } }; +enum class DebugType { + None = 0x0, + CV = 0x1, /// CodeView + PData = 0x2, /// Procedure Data + Fixup = 0x4, /// Relocation Table +}; + // Global configuration. struct Configuration { enum ManifestKind { SideBySide, Embed, No }; @@ -69,7 +77,7 @@ struct Configuration { llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; bool Verbose = false; WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; - Undefined *Entry = nullptr; + SymbolBody *Entry = nullptr; bool NoEntry = false; std::string OutputFile; bool DoGC = true; @@ -78,9 +86,11 @@ struct Configuration { bool Force = false; bool Debug = false; bool WriteSymtab = true; + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + StringRef PDBPath; // Symbols in this set are considered as live by the garbage collector. - std::set<Undefined *> GCRoot; + std::set<SymbolBody *> GCRoot; std::set<StringRef> NoDefaultLibs; bool NoDefaultLibAll = false; @@ -91,11 +101,11 @@ struct Configuration { std::vector<Export> Exports; std::set<std::string> DelayLoads; std::map<std::string, int> DLLOrder; - Undefined *DelayLoadHelper = nullptr; + SymbolBody *DelayLoadHelper = nullptr; // Used for SafeSEH. - DefinedRelative *SEHTable = nullptr; - DefinedAbsolute *SEHCount = nullptr; + Symbol *SEHTable = nullptr; + Symbol *SEHCount = nullptr; // Used for /opt:lldlto=N unsigned LTOOptLevel = 2; @@ -141,6 +151,10 @@ struct Configuration { bool TerminalServerAware = true; bool LargeAddressAware = false; bool HighEntropyVA = false; + + // This is for debugging. + bool DebugPdb = false; + bool DumpPdb = false; }; extern Configuration *Config; diff --git a/contrib/llvm/tools/lld/COFF/DLL.cpp b/contrib/llvm/tools/lld/COFF/DLL.cpp index 9ac370c11d59..f93dc5cde44c 100644 --- a/contrib/llvm/tools/lld/COFF/DLL.cpp +++ b/contrib/llvm/tools/lld/COFF/DLL.cpp @@ -324,7 +324,7 @@ public: if (E.ForwardChunk) { write32le(P, E.ForwardChunk->getRVA()); } else { - write32le(P, cast<Defined>(E.Sym->repl())->getRVA()); + write32le(P, cast<Defined>(E.Sym)->getRVA()); } } } diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index bb6a60e4fc4c..dc3a00ba55ed 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "Config.h" #include "Driver.h" +#include "Config.h" #include "Error.h" #include "InputFiles.h" +#include "Memory.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Driver/Driver.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/LibDriver/LibDriver.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -28,6 +30,13 @@ #include <algorithm> #include <memory> +#ifdef _MSC_VER +// <future> depends on <eh.h> for __uncaught_exception. +#include <eh.h> +#endif + +#include <future> + using namespace llvm; using namespace llvm::COFF; using llvm::sys::Process; @@ -41,11 +50,13 @@ namespace coff { Configuration *Config; LinkerDriver *Driver; -bool link(llvm::ArrayRef<const char *> Args) { - Configuration C; - LinkerDriver D; - Config = &C; - Driver = &D; +BumpPtrAllocator BAlloc; +StringSaver Saver{BAlloc}; +std::vector<SpecificAllocBase *> SpecificAllocBase::Instances; + +bool link(ArrayRef<const char *> Args) { + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); Driver->link(Args); return true; } @@ -58,26 +69,123 @@ static std::string getOutputPath(StringRef Path) { return (S.substr(0, S.rfind('.')) + E).str(); } -// Opens a file. Path has to be resolved already. -// Newly created memory buffers are owned by this driver. -MemoryBufferRef LinkerDriver::openFile(StringRef Path) { - std::unique_ptr<MemoryBuffer> MB = - check(MemoryBuffer::getFile(Path), "could not open " + Path); - MemoryBufferRef MBRef = MB->getMemBufferRef(); - OwningMBs.push_back(std::move(MB)); // take ownership +// ErrorOr is not default constructible, so it cannot be used as the type +// parameter of a future. +// FIXME: We could open the file in createFutureForFile and avoid needing to +// return an error here, but for the moment that would cost us a file descriptor +// (a limited resource on Windows) for the duration that the future is pending. +typedef std::pair<std::unique_ptr<MemoryBuffer>, std::error_code> MBErrPair; + +// Create a std::future that opens and maps a file using the best strategy for +// the host platform. +static std::future<MBErrPair> createFutureForFile(std::string Path) { +#if LLVM_ON_WIN32 + // On Windows, file I/O is relatively slow so it is best to do this + // asynchronously. + auto Strategy = std::launch::async; +#else + auto Strategy = std::launch::deferred; +#endif + return std::async(Strategy, [=]() { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (!MBOrErr) + return MBErrPair{nullptr, MBOrErr.getError()}; + return MBErrPair{std::move(*MBOrErr), std::error_code()}; + }); +} + +MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = *MB; + OwningMBs.push_back(std::move(MB)); + + if (Driver->Cpio) + Driver->Cpio->append(relativeToRoot(MBRef.getBufferIdentifier()), + MBRef.getBuffer()); + return MBRef; } -static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) { +void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB) { + MemoryBufferRef MBRef = takeBuffer(std::move(MB)); + // File type is detected by contents, not by file extension. - file_magic Magic = identify_magic(MB.getBuffer()); + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::windows_resource) { + Resources.push_back(MBRef); + return; + } + + FilePaths.push_back(MBRef.getBufferIdentifier()); if (Magic == file_magic::archive) - return std::unique_ptr<InputFile>(new ArchiveFile(MB)); + return Symtab.addFile(make<ArchiveFile>(MBRef)); if (Magic == file_magic::bitcode) - return std::unique_ptr<InputFile>(new BitcodeFile(MB)); + return Symtab.addFile(make<BitcodeFile>(MBRef)); + if (Magic == file_magic::coff_cl_gl_object) + fatal(MBRef.getBufferIdentifier() + ": is not a native COFF file. " + "Recompile without /GL"); + Symtab.addFile(make<ObjectFile>(MBRef)); +} + +void LinkerDriver::enqueuePath(StringRef Path) { + auto Future = + std::make_shared<std::future<MBErrPair>>(createFutureForFile(Path)); + std::string PathStr = Path; + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, "could not open " + PathStr); + Driver->addBuffer(std::move(MBOrErr.first)); + }); + if (Config->OutputFile == "") - Config->OutputFile = getOutputPath(MB.getBufferIdentifier()); - return std::unique_ptr<InputFile>(new ObjectFile(MB)); + Config->OutputFile = getOutputPath(Path); +} + +void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName, + StringRef ParentName) { + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::coff_import_library) { + Symtab.addFile(make<ImportFile>(MB)); + return; + } + + InputFile *Obj; + if (Magic == file_magic::coff_object) + Obj = make<ObjectFile>(MB); + else if (Magic == file_magic::bitcode) + Obj = make<BitcodeFile>(MB); + else + fatal("unknown file type: " + MB.getBufferIdentifier()); + + Obj->ParentName = ParentName; + Symtab.addFile(Obj); + if (Config->Verbose) + outs() << "Loaded " << toString(Obj) << " for " << SymName << "\n"; +} + +void LinkerDriver::enqueueArchiveMember(const Archive::Child &C, + StringRef SymName, + StringRef ParentName) { + if (!C.getParent()->isThin()) { + MemoryBufferRef MB = check( + C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + SymName); + enqueueTask([=]() { Driver->addArchiveBuffer(MB, SymName, ParentName); }); + return; + } + + auto Future = std::make_shared<std::future<MBErrPair>>(createFutureForFile( + check(C.getFullName(), + "could not get the filename for the member defining symbol " + + SymName))); + enqueueTask([=]() { + auto MBOrErr = Future->get(); + if (MBOrErr.second) + fatal(MBOrErr.second, + "could not get the buffer for the member defining " + SymName); + Driver->addArchiveBuffer(takeBuffer(std::move(MBOrErr.first)), SymName, + ParentName); + }); } static bool isDecorated(StringRef Sym) { @@ -87,7 +195,7 @@ static bool isDecorated(StringRef Sym) { // Parses .drectve section contents and returns a list of files // specified by /defaultlib. void LinkerDriver::parseDirectives(StringRef S) { - llvm::opt::InputArgList Args = Parser.parse(S); + opt::InputArgList Args = Parser.parse(S); for (auto *Arg : Args) { switch (Arg->getOption().getID()) { @@ -95,10 +203,8 @@ void LinkerDriver::parseDirectives(StringRef S) { parseAlternateName(Arg->getValue()); break; case OPT_defaultlib: - if (Optional<StringRef> Path = findLib(Arg->getValue())) { - MemoryBufferRef MB = openFile(*Path); - Symtab.addFile(createFile(MB)); - } + if (Optional<StringRef> Path = findLib(Arg->getValue())) + enqueuePath(*Path); break; case OPT_export: { Export E = parseExport(Arg->getValue()); @@ -135,19 +241,19 @@ void LinkerDriver::parseDirectives(StringRef S) { // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef Filename) { - bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); - if (hasPathSep) + bool HasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (HasPathSep) return Filename; - bool hasExt = (Filename.find('.') != StringRef::npos); + bool HasExt = (Filename.find('.') != StringRef::npos); for (StringRef Dir : SearchPaths) { SmallString<128> Path = Dir; - llvm::sys::path::append(Path, Filename); - if (llvm::sys::fs::exists(Path.str())) - return Alloc.save(Path.str()); - if (!hasExt) { + sys::path::append(Path, Filename); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); + if (!HasExt) { Path.append(".obj"); - if (llvm::sys::fs::exists(Path.str())) - return Alloc.save(Path.str()); + if (sys::fs::exists(Path.str())) + return Saver.save(Path.str()); } } return Filename; @@ -166,9 +272,9 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef Filename) { // Add ".lib" to Filename if that has no file extension. - bool hasExt = (Filename.find('.') != StringRef::npos); - if (!hasExt) - Filename = Alloc.save(Filename + ".lib"); + bool HasExt = (Filename.find('.') != StringRef::npos); + if (!HasExt) + Filename = Saver.save(Filename + ".lib"); return doFindFile(Filename); } @@ -178,11 +284,12 @@ StringRef LinkerDriver::doFindLib(StringRef Filename) { Optional<StringRef> LinkerDriver::findLib(StringRef Filename) { if (Config->NoDefaultLibAll) return None; + if (!VisitedLibs.insert(Filename.lower()).second) + return None; StringRef Path = doFindLib(Filename); if (Config->NoDefaultLibs.count(Path)) return None; - bool Seen = !VisitedFiles.insert(Path.lower()).second; - if (Seen) + if (!VisitedFiles.insert(Path.lower()).second) return None; return Path; } @@ -192,7 +299,7 @@ void LinkerDriver::addLibSearchPaths() { Optional<std::string> EnvOpt = Process::GetEnv("LIB"); if (!EnvOpt.hasValue()) return; - StringRef Env = Alloc.save(*EnvOpt); + StringRef Env = Saver.save(*EnvOpt); while (!Env.empty()) { StringRef Path; std::tie(Path, Env) = Env.split(';'); @@ -200,17 +307,17 @@ void LinkerDriver::addLibSearchPaths() { } } -Undefined *LinkerDriver::addUndefined(StringRef Name) { - Undefined *U = Symtab.addUndefined(Name); - Config->GCRoot.insert(U); - return U; +SymbolBody *LinkerDriver::addUndefined(StringRef Name) { + SymbolBody *B = Symtab.addUndefined(Name); + Config->GCRoot.insert(B); + return B; } // Symbol names are mangled by appending "_" prefix on x86. StringRef LinkerDriver::mangle(StringRef Sym) { assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); if (Config->Machine == I386) - return Alloc.save("_" + Sym); + return Saver.save("_" + Sym); return Sym; } @@ -225,7 +332,7 @@ StringRef LinkerDriver::findDefaultEntry() { }; for (auto E : Entries) { StringRef Entry = Symtab.findMangle(mangle(E[0])); - if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->Body)) + if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->body())) return mangle(E[1]); } return ""; @@ -247,7 +354,83 @@ static uint64_t getDefaultImageBase() { return Config->DLL ? 0x10000000 : 0x400000; } -void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { +static std::string createResponseFile(const opt::InputArgList &Args, + ArrayRef<StringRef> FilePaths, + ArrayRef<StringRef> SearchPaths) { + SmallString<0> Data; + raw_svector_ostream OS(Data); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_linkrepro: + case OPT_INPUT: + case OPT_defaultlib: + case OPT_libpath: + break; + default: + OS << stringize(Arg) << "\n"; + } + } + + for (StringRef Path : SearchPaths) { + std::string RelPath = relativeToRoot(Path); + OS << "/libpath:" << quote(RelPath) << "\n"; + } + + for (StringRef Path : FilePaths) + OS << quote(relativeToRoot(Path)) << "\n"; + + return Data.str(); +} + +static unsigned getDefaultDebugType(const opt::InputArgList &Args) { + unsigned DebugTypes = static_cast<unsigned>(DebugType::CV); + if (Args.hasArg(OPT_driver)) + DebugTypes |= static_cast<unsigned>(DebugType::PData); + if (Args.hasArg(OPT_profile)) + DebugTypes |= static_cast<unsigned>(DebugType::Fixup); + return DebugTypes; +} + +static unsigned parseDebugType(StringRef Arg) { + SmallVector<StringRef, 3> Types; + Arg.split(Types, ',', /*KeepEmpty=*/false); + + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + for (StringRef Type : Types) + DebugTypes |= StringSwitch<unsigned>(Type.lower()) + .Case("cv", static_cast<unsigned>(DebugType::CV)) + .Case("pdata", static_cast<unsigned>(DebugType::PData)) + .Case("fixup", static_cast<unsigned>(DebugType::Fixup)); + return DebugTypes; +} + +static std::string getMapFile(const opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_lldmap, OPT_lldmap_file); + if (!Arg) + return ""; + if (Arg->getOption().getID() == OPT_lldmap_file) + return Arg->getValue(); + + assert(Arg->getOption().getID() == OPT_lldmap); + StringRef OutFile = Config->OutputFile; + return (OutFile.substr(0, OutFile.rfind('.')) + ".map").str(); +} + +void LinkerDriver::enqueueTask(std::function<void()> Task) { + TaskQueue.push_back(std::move(Task)); +} + +bool LinkerDriver::run() { + bool DidWork = !TaskQueue.empty(); + while (!TaskQueue.empty()) { + TaskQueue.front()(); + TaskQueue.pop_front(); + } + return DidWork; +} + +void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) { @@ -257,15 +440,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { } // Needed for LTO. - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllDisassemblers(); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); // Parse command line options. - llvm::opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); + opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); // Handle /help if (Args.hasArg(OPT_help)) { @@ -273,6 +456,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { return; } + if (auto *Arg = Args.getLastArg(OPT_linkrepro)) { + SmallString<64> Path = StringRef(Arg->getValue()); + sys::path::append(Path, "repro"); + ErrorOr<CpioFile *> F = CpioFile::create(Path); + if (F) + Cpio.reset(*F); + else + errs() << "/linkrepro: failed to open " << Path + << ".cpio: " << F.getError().message() << '\n'; + } + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) fatal("no input files"); @@ -295,8 +489,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Config->Force = true; // Handle /debug - if (Args.hasArg(OPT_debug)) + if (Args.hasArg(OPT_debug)) { Config->Debug = true; + Config->DebugTypes = + Args.hasArg(OPT_debugtype) + ? parseDebugType(Args.getLastArg(OPT_debugtype)->getValue()) + : getDefaultDebugType(Args); + } + + // Create a dummy PDB file to satisfy build sytem rules. + if (auto *Arg = Args.getLastArg(OPT_pdb)) + Config->PDBPath = Arg->getValue(); // Handle /noentry if (Args.hasArg(OPT_noentry)) { @@ -447,72 +650,43 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Config->TerminalServerAware = false; if (Args.hasArg(OPT_nosymtab)) Config->WriteSymtab = false; + Config->DumpPdb = Args.hasArg(OPT_dumppdb); + Config->DebugPdb = Args.hasArg(OPT_debugpdb); // Create a list of input files. Files can be given as arguments // for /defaultlib option. - std::vector<StringRef> Paths; std::vector<MemoryBufferRef> MBs; for (auto *Arg : Args.filtered(OPT_INPUT)) if (Optional<StringRef> Path = findFile(Arg->getValue())) - Paths.push_back(*Path); + enqueuePath(*Path); for (auto *Arg : Args.filtered(OPT_defaultlib)) if (Optional<StringRef> Path = findLib(Arg->getValue())) - Paths.push_back(*Path); - for (StringRef Path : Paths) - MBs.push_back(openFile(Path)); + enqueuePath(*Path); // Windows specific -- Create a resource file containing a manifest file. - if (Config->Manifest == Configuration::Embed) { - std::unique_ptr<MemoryBuffer> MB = createManifestRes(); - MBs.push_back(MB->getMemBufferRef()); - OwningMBs.push_back(std::move(MB)); // take ownership - } + if (Config->Manifest == Configuration::Embed) + addBuffer(createManifestRes()); - // Windows specific -- Input files can be Windows resource files (.res files). - // We invoke cvtres.exe to convert resource files to a regular COFF file - // then link the result file normally. - std::vector<MemoryBufferRef> Resources; - auto NotResource = [](MemoryBufferRef MB) { - return identify_magic(MB.getBuffer()) != file_magic::windows_resource; - }; - auto It = std::stable_partition(MBs.begin(), MBs.end(), NotResource); - if (It != MBs.end()) { - Resources.insert(Resources.end(), It, MBs.end()); - MBs.erase(It, MBs.end()); - } + // Read all input files given via the command line. + run(); - // Read all input files given via the command line. Note that step() - // doesn't read files that are specified by directive sections. - for (MemoryBufferRef MB : MBs) - Symtab.addFile(createFile(MB)); - Symtab.step(); - - // Determine machine type and check if all object files are - // for the same CPU type. Note that this needs to be done before - // any call to mangle(). - for (std::unique_ptr<InputFile> &File : Symtab.getFiles()) { - MachineTypes MT = File->getMachineType(); - if (MT == IMAGE_FILE_MACHINE_UNKNOWN) - continue; - if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { - Config->Machine = MT; - continue; - } - if (Config->Machine != MT) - fatal(File->getShortName() + ": machine type " + machineToStr(MT) + - " conflicts with " + machineToStr(Config->Machine)); - } + // We should have inferred a machine type by now from the input files, but if + // not we assume x64. if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { - llvm::errs() << "warning: /machine is not specified. x64 is assumed.\n"; + errs() << "warning: /machine is not specified. x64 is assumed.\n"; Config->Machine = AMD64; } - // Windows specific -- Convert Windows resource files to a COFF file. - if (!Resources.empty()) { - std::unique_ptr<MemoryBuffer> MB = convertResToCOFF(Resources); - Symtab.addFile(createFile(MB->getMemBufferRef())); - OwningMBs.push_back(std::move(MB)); // take ownership - } + // Windows specific -- Input files can be Windows resource files (.res files). + // We invoke cvtres.exe to convert resource files to a regular COFF file + // then link the result file normally. + if (!Resources.empty()) + addBuffer(convertResToCOFF(Resources)); + + if (Cpio) + Cpio->append("response.txt", + createResponseFile(Args, FilePaths, + ArrayRef<StringRef>(SearchPaths).slice(1))); // Handle /largeaddressaware if (Config->is64() || Args.hasArg(OPT_largeaddressaware)) @@ -537,7 +711,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { fatal("entry point must be defined"); Config->Entry = addUndefined(S); if (Config->Verbose) - llvm::outs() << "Entry name inferred: " << S << "\n"; + outs() << "Entry name inferred: " << S << "\n"; } // Handle /export @@ -545,18 +719,19 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Export E = parseExport(Arg->getValue()); if (Config->Machine == I386) { if (!isDecorated(E.Name)) - E.Name = Alloc.save("_" + E.Name); + E.Name = Saver.save("_" + E.Name); if (!E.ExtName.empty() && !isDecorated(E.ExtName)) - E.ExtName = Alloc.save("_" + E.ExtName); + E.ExtName = Saver.save("_" + E.ExtName); } Config->Exports.push_back(E); } // Handle /def if (auto *Arg = Args.getLastArg(OPT_deffile)) { - MemoryBufferRef MB = openFile(Arg->getValue()); // parseModuleDefs mutates Config object. - parseModuleDefs(MB, &Alloc); + parseModuleDefs( + takeBuffer(check(MemoryBuffer::getFile(Arg->getValue()), + Twine("could not open ") + Arg->getValue()))); } // Handle /delayload @@ -585,14 +760,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Symtab.addAbsolute(mangle("__guard_fids_count"), 0); Symtab.addAbsolute(mangle("__guard_flags"), 0x100); - // Read as much files as we can from directives sections. - Symtab.run(); - - // Resolve auxiliary symbols until we get a convergence. - // (Trying to resolve a symbol may trigger a Lazy symbol to load a new file. - // A new file may contain a directive section to add new command line options. - // That's why we have to repeat until converge.) - for (;;) { + // This code may add new undefined symbols to the link, which may enqueue more + // symbol resolution tasks, so we need to continue executing tasks until we + // converge. + do { // Windows specific -- if entry point is not found, // search for its mangled names. if (Config->Entry) @@ -615,7 +786,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { Symbol *Sym = Symtab.find(From); if (!Sym) continue; - if (auto *U = dyn_cast<Undefined>(Sym->Body)) + if (auto *U = dyn_cast<Undefined>(Sym->body())) if (!U->WeakAlias) U->WeakAlias = Symtab.addUndefined(To); } @@ -623,18 +794,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Windows specific -- if __load_config_used can be resolved, resolve it. if (Symtab.findUnderscore("_load_config_used")) addUndefined(mangle("_load_config_used")); - - if (Symtab.queueEmpty()) - break; - Symtab.run(); - } + } while (run()); // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files. Symtab.addCombinedLTOObjects(); + run(); // Make sure we have resolved all symbols. - Symtab.reportRemainingUndefines(/*Resolve=*/true); + Symtab.reportRemainingUndefines(); // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. @@ -662,10 +830,6 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { if (Config->Manifest == Configuration::SideBySide) createSideBySideManifest(); - // Create a dummy PDB file to satisfy build sytem rules. - if (auto *Arg = Args.getLastArg(OPT_pdb)) - createPDB(Arg->getValue()); - // Identify unreferenced COMDAT sections. if (Config->DoGC) markLive(Symtab.getChunks()); @@ -679,13 +843,15 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { // Create a symbol map file containing symbol VAs and their names // to help debugging. - if (auto *Arg = Args.getLastArg(OPT_lldmap)) { + std::string MapFile = getMapFile(Args); + if (!MapFile.empty()) { std::error_code EC; - llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text); + raw_fd_ostream Out(MapFile, EC, OpenFlags::F_Text); if (EC) - fatal(EC, "could not create the symbol map"); + fatal(EC, "could not create the symbol map " + MapFile); Symtab.printMap(Out); } + // Call exit to avoid calling destructors. exit(0); } diff --git a/contrib/llvm/tools/lld/COFF/Driver.h b/contrib/llvm/tools/lld/COFF/Driver.h index 23969ee802fb..e8114640edec 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.h +++ b/contrib/llvm/tools/lld/COFF/Driver.h @@ -13,12 +13,13 @@ #include "Config.h" #include "SymbolTable.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/StringSaver.h" #include <memory> #include <set> #include <vector> @@ -42,7 +43,6 @@ void doICF(const std::vector<Chunk *> &Chunks); class ArgParser { public: - ArgParser() : Alloc(AllocAux) {} // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args); @@ -56,25 +56,26 @@ private: std::vector<const char *> tokenize(StringRef S); std::vector<const char *> replaceResponseFiles(std::vector<const char *>); - - llvm::BumpPtrAllocator AllocAux; - llvm::StringSaver Alloc; }; class LinkerDriver { public: - LinkerDriver() : Alloc(AllocAux) {} + LinkerDriver() { coff::Symtab = &Symtab; } void link(llvm::ArrayRef<const char *> Args); // Used by the resolver to parse .drectve section contents. void parseDirectives(StringRef S); + // Used by ArchiveFile to enqueue members. + void enqueueArchiveMember(const Archive::Child &C, StringRef SymName, + StringRef ParentName); + private: - llvm::BumpPtrAllocator AllocAux; - llvm::StringSaver Alloc; ArgParser Parser; SymbolTable Symtab; + std::unique_ptr<CpioFile> Cpio; // for /linkrepro + // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef Path); @@ -90,8 +91,9 @@ private: // Library search path. The first element is always "" (current directory). std::vector<StringRef> SearchPaths; std::set<std::string> VisitedFiles; + std::set<std::string> VisitedLibs; - Undefined *addUndefined(StringRef Sym); + SymbolBody *addUndefined(StringRef Sym); StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. @@ -104,12 +106,26 @@ private: StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); + MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB); + void addBuffer(std::unique_ptr<MemoryBuffer> MB); + void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName, + StringRef ParentName); + + void enqueuePath(StringRef Path); + + void enqueueTask(std::function<void()> Task); + bool run(); + // Driver is the owner of all opened files. // InputFiles have MemoryBufferRefs to them. std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; + + std::list<std::function<void()>> TaskQueue; + std::vector<StringRef> FilePaths; + std::vector<MemoryBufferRef> Resources; }; -void parseModuleDefs(MemoryBufferRef MB, llvm::StringSaver *Alloc); +void parseModuleDefs(MemoryBufferRef MB); void writeImportLibrary(); // Functions below this line are defined in DriverUtils.cpp. @@ -161,8 +177,6 @@ void checkFailIfMismatch(StringRef Arg); std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs); -void createPDB(StringRef Path); - // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp index 5d7dc2bc65af..14dd004f1c04 100644 --- a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp +++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp @@ -16,6 +16,7 @@ #include "Config.h" #include "Driver.h" #include "Error.h" +#include "Memory.h" #include "Symbols.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" @@ -43,29 +44,29 @@ namespace { class Executor { public: explicit Executor(StringRef S) : Saver(Alloc), Prog(Saver.save(S)) {} - void add(StringRef S) { Args.push_back(Saver.save(S)); } - void add(std::string &S) { Args.push_back(Saver.save(S)); } - void add(Twine S) { Args.push_back(Saver.save(S)); } - void add(const char *S) { Args.push_back(Saver.save(S)); } + void add(StringRef S) { Args.push_back(Saver.save(S).data()); } + void add(std::string &S) { Args.push_back(Saver.save(S).data()); } + void add(Twine S) { Args.push_back(Saver.save(S).data()); } + void add(const char *S) { Args.push_back(Saver.save(S).data()); } void run() { - ErrorOr<std::string> ExeOrErr = llvm::sys::findProgramByName(Prog); + ErrorOr<std::string> ExeOrErr = sys::findProgramByName(Prog); if (auto EC = ExeOrErr.getError()) fatal(EC, "unable to find " + Prog + " in PATH: "); - const char *Exe = Saver.save(*ExeOrErr); + const char *Exe = Saver.save(*ExeOrErr).data(); Args.insert(Args.begin(), Exe); Args.push_back(nullptr); - if (llvm::sys::ExecuteAndWait(Args[0], Args.data()) != 0) { + if (sys::ExecuteAndWait(Args[0], Args.data()) != 0) { for (const char *S : Args) if (S) - llvm::errs() << S << " "; + errs() << S << " "; fatal("ExecuteAndWait failed"); } } private: - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver; + BumpPtrAllocator Alloc; + StringSaver Saver; StringRef Prog; std::vector<const char *> Args; }; @@ -75,10 +76,8 @@ private: // Returns /machine's value. MachineTypes getMachineType(StringRef S) { MachineTypes MT = StringSwitch<MachineTypes>(S.lower()) - .Case("x64", AMD64) - .Case("amd64", AMD64) - .Case("x86", I386) - .Case("i386", I386) + .Cases("x64", "amd64", AMD64) + .Cases("x86", "i386", I386) .Case("arm", ARMNT) .Default(IMAGE_FILE_MACHINE_UNKNOWN); if (MT != IMAGE_FILE_MACHINE_UNKNOWN) @@ -168,8 +167,8 @@ void parseMerge(StringRef S) { if (!Inserted) { StringRef Existing = Pair.first->second; if (Existing != To) - llvm::errs() << "warning: " << S << ": already merged into " - << Existing << "\n"; + errs() << "warning: " << S << ": already merged into " << Existing + << "\n"; } } @@ -279,18 +278,54 @@ static void quoteAndPrint(raw_ostream &Out, StringRef S) { } } +// An RAII temporary file class that automatically removes a temporary file. +namespace { +class TemporaryFile { +public: + TemporaryFile(StringRef Prefix, StringRef Extn) { + SmallString<128> S; + if (auto EC = sys::fs::createTemporaryFile("lld-" + Prefix, Extn, S)) + fatal(EC, "cannot create a temporary file"); + Path = S.str(); + } + + TemporaryFile(TemporaryFile &&Obj) { + std::swap(Path, Obj.Path); + } + + ~TemporaryFile() { + if (Path.empty()) + return; + if (sys::fs::remove(Path)) + fatal("failed to remove " + Path); + } + + // Returns a memory buffer of this temporary file. + // Note that this function does not leave the file open, + // so it is safe to remove the file immediately after this function + // is called (you cannot remove an opened file on Windows.) + std::unique_ptr<MemoryBuffer> getMemoryBuffer() { + // IsVolatileSize=true forces MemoryBuffer to not use mmap(). + return check(MemoryBuffer::getFile(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false, + /*IsVolatileSize=*/true), + "could not open " + Path); + } + + std::string Path; +}; +} + // Create the default manifest file as a temporary file. -static std::string createDefaultXml() { +TemporaryFile createDefaultXml() { // Create a temporary file. - SmallString<128> Path; - if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path)) - fatal(EC, "cannot create a temporary file"); + TemporaryFile File("defaultxml", "manifest"); // Open the temporary file for writing. std::error_code EC; - llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text); + raw_fd_ostream OS(File.Path, EC, sys::fs::F_Text); if (EC) - fatal(EC, "failed to open " + Path); + fatal(EC, "failed to open " + File.Path); // Emit the XML. Note that we do *not* verify that the XML attributes are // syntactically correct. This is intentional for link.exe compatibility. @@ -316,56 +351,48 @@ static std::string createDefaultXml() { } OS << "</assembly>\n"; OS.close(); - return StringRef(Path); + return File; } static std::string readFile(StringRef Path) { std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(Path), "could not open " + Path); - std::unique_ptr<MemoryBuffer> Buf(std::move(MB)); - return Buf->getBuffer(); + return MB->getBuffer(); } static std::string createManifestXml() { // Create the default manifest file. - std::string Path1 = createDefaultXml(); + TemporaryFile File1 = createDefaultXml(); if (Config->ManifestInput.empty()) - return readFile(Path1); + return readFile(File1.Path); // If manifest files are supplied by the user using /MANIFESTINPUT // option, we need to merge them with the default manifest. - SmallString<128> Path2; - if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2)) - fatal(EC, "cannot create a temporary file"); - FileRemover Remover1(Path1); - FileRemover Remover2(Path2); + TemporaryFile File2("user", "manifest"); Executor E("mt.exe"); E.add("/manifest"); - E.add(Path1); + E.add(File1.Path); for (StringRef Filename : Config->ManifestInput) { E.add("/manifest"); E.add(Filename); } E.add("/nologo"); - E.add("/out:" + StringRef(Path2)); + E.add("/out:" + StringRef(File2.Path)); E.run(); - return readFile(Path2); + return readFile(File2.Path); } // Create a resource file containing a manifest XML. std::unique_ptr<MemoryBuffer> createManifestRes() { // Create a temporary file for the resource script file. - SmallString<128> RCPath; - if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath)) - fatal(EC, "cannot create a temporary file"); - FileRemover RCRemover(RCPath); + TemporaryFile RCFile("manifest", "rc"); // Open the temporary file for writing. std::error_code EC; - llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text); + raw_fd_ostream Out(RCFile.Path, EC, sys::fs::F_Text); if (EC) - fatal(EC, "failed to open " + RCPath); + fatal(EC, "failed to open " + RCFile.Path); // Write resource script to the RC file. Out << "#define LANG_ENGLISH 9\n" @@ -379,17 +406,15 @@ std::unique_ptr<MemoryBuffer> createManifestRes() { Out.close(); // Create output resource file. - SmallString<128> ResPath; - if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath)) - fatal(EC, "cannot create a temporary file"); + TemporaryFile ResFile("output-resource", "res"); Executor E("rc.exe"); E.add("/fo"); - E.add(ResPath.str()); + E.add(ResFile.Path); E.add("/nologo"); - E.add(RCPath.str()); + E.add(RCFile.Path); E.run(); - return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath); + return ResFile.getMemoryBuffer(); } void createSideBySideManifest() { @@ -397,7 +422,7 @@ void createSideBySideManifest() { if (Path == "") Path = Config->OutputFile + ".manifest"; std::error_code EC; - llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text); + raw_fd_ostream Out(Path, EC, sys::fs::F_Text); if (EC) fatal(EC, "failed to create manifest"); Out << createManifestXml(); @@ -485,12 +510,14 @@ void fixupExports() { } for (Export &E : Config->Exports) { + SymbolBody *Sym = E.Sym; if (!E.ForwardTo.empty()) { E.SymbolName = E.Name; - } else if (Undefined *U = cast_or_null<Undefined>(E.Sym->WeakAlias)) { - E.SymbolName = U->getName(); } else { - E.SymbolName = E.Sym->getName(); + if (auto *U = dyn_cast<Undefined>(Sym)) + if (U->WeakAlias) + Sym = U->WeakAlias; + E.SymbolName = Sym->getName(); } } @@ -515,7 +542,7 @@ void fixupExports() { Export *Existing = Pair.first->second; if (E == *Existing || E.Name != Existing->Name) continue; - llvm::errs() << "warning: duplicate /export option: " << E.Name << "\n"; + errs() << "warning: duplicate /export option: " << E.Name << "\n"; } Config->Exports = std::move(V); @@ -555,20 +582,39 @@ void checkFailIfMismatch(StringRef Arg) { std::unique_ptr<MemoryBuffer> convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) { // Create an output file path. - SmallString<128> Path; - if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path)) - fatal(EC, "could not create temporary file"); + TemporaryFile File("resource-file", "obj"); // Execute cvtres.exe. Executor E("cvtres.exe"); E.add("/machine:" + machineToStr(Config->Machine)); E.add("/readonly"); E.add("/nologo"); - E.add("/out:" + Path); - for (MemoryBufferRef MB : MBs) - E.add(MB.getBufferIdentifier()); + E.add("/out:" + Twine(File.Path)); + + // We must create new files because the memory buffers we have may have no + // underlying file still existing on the disk. + // It happens if it was created from a TemporaryFile, which usually delete + // the file just after creating the MemoryBuffer. + std::vector<TemporaryFile> ResFiles; + ResFiles.reserve(MBs.size()); + for (MemoryBufferRef MB : MBs) { + // We store the temporary file in a vector to avoid deletion + // before running cvtres + ResFiles.emplace_back("resource-file", "res"); + TemporaryFile& ResFile = ResFiles.back(); + // Write the content of the resource in a temporary file + std::error_code EC; + raw_fd_ostream OS(ResFile.Path, EC, sys::fs::F_None); + if (EC) + fatal(EC, "failed to open " + ResFile.Path); + OS << MB.getBuffer(); + OS.close(); + + E.add(ResFile.Path); + } + E.run(); - return check(MemoryBuffer::getFile(Path), "could not open " + Path); + return File.getMemoryBuffer(); } // Create OptTable @@ -595,7 +641,7 @@ public: }; // Parses a given list of options. -llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { +opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { // First, replace respnose files (@<file>-style options). std::vector<const char *> Argv = replaceResponseFiles(ArgsArr); @@ -603,28 +649,28 @@ llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) { COFFOptTable Table; unsigned MissingIndex; unsigned MissingCount; - llvm::opt::InputArgList Args = - Table.ParseArgs(Argv, MissingIndex, MissingCount); + opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount); // Print the real command line if response files are expanded. if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) { - llvm::outs() << "Command line:"; + outs() << "Command line:"; for (const char *S : Argv) - llvm::outs() << " " << S; - llvm::outs() << "\n"; + outs() << " " << S; + outs() << "\n"; } if (MissingCount) - fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) + - "\", expected " + Twine(MissingCount) + - (MissingCount == 1 ? " argument." : " arguments.")); + fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) - llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; return Args; } -llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { - // Concatenate LINK env and given arguments and parse them. +// link.exe has an interesting feature. If LINK environment exists, +// its contents are handled as a command line string. So you can pass +// extra arguments using the environment variable. +opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { + // Concatenate LINK env and command line arguments, and then parse them. Optional<std::string> Env = Process::GetEnv("LINK"); if (!Env) return parse(Args); @@ -635,8 +681,7 @@ llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) { std::vector<const char *> ArgParser::tokenize(StringRef S) { SmallVector<const char *, 16> Tokens; - StringSaver Saver(AllocAux); - llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + cl::TokenizeWindowsCommandLine(S, Saver, Tokens); return std::vector<const char *>(Tokens.begin(), Tokens.end()); } @@ -645,14 +690,13 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) { std::vector<const char *> ArgParser::replaceResponseFiles(std::vector<const char *> Argv) { SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size()); - StringSaver Saver(AllocAux); ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); return std::vector<const char *>(Tokens.begin(), Tokens.end()); } void printHelp(const char *Argv0) { COFFOptTable Table; - Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false); + Table.PrintHelp(outs(), Argv0, "LLVM Linker", false); } } // namespace coff diff --git a/contrib/llvm/tools/lld/COFF/Error.cpp b/contrib/llvm/tools/lld/COFF/Error.cpp index 602a8544ce2b..b2bd557413df 100644 --- a/contrib/llvm/tools/lld/COFF/Error.cpp +++ b/contrib/llvm/tools/lld/COFF/Error.cpp @@ -11,14 +11,31 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace llvm; + namespace lld { namespace coff { void fatal(const Twine &Msg) { - llvm::errs() << Msg << "\n"; - exit(1); + if (sys::Process::StandardErrHasColors()) { + errs().changeColor(raw_ostream::RED, /*bold=*/true); + errs() << "error: "; + errs().resetColor(); + } else { + errs() << "error: "; + } + errs() << Msg << "\n"; + + outs().flush(); + errs().flush(); + _exit(1); } void fatal(std::error_code EC, const Twine &Msg) { diff --git a/contrib/llvm/tools/lld/COFF/Error.h b/contrib/llvm/tools/lld/COFF/Error.h index c9f64c662580..47549327db2b 100644 --- a/contrib/llvm/tools/lld/COFF/Error.h +++ b/contrib/llvm/tools/lld/COFF/Error.h @@ -32,6 +32,23 @@ template <class T> T check(Expected<T> E, const Twine &Prefix) { return std::move(*E); } +template <class T> T check(ErrorOr<T> EO) { + if (!EO) + fatal(EO.getError().message()); + return std::move(*EO); +} + +template <class T> T check(Expected<T> E) { + if (!E) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + logAllUnhandledErrors(E.takeError(), OS, ""); + OS.flush(); + fatal(Buf); + } + return std::move(*E); +} + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/ICF.cpp b/contrib/llvm/tools/lld/COFF/ICF.cpp index a2c5a90334d0..196fbe2610ea 100644 --- a/contrib/llvm/tools/lld/COFF/ICF.cpp +++ b/contrib/llvm/tools/lld/COFF/ICF.cpp @@ -7,43 +7,19 @@ // //===----------------------------------------------------------------------===// // -// Identical COMDAT Folding is a feature to merge COMDAT sections not by -// name (which is regular COMDAT handling) but by contents. If two COMDAT -// sections have the same data, relocations, attributes, etc., then the two -// are considered identical and merged by the linker. This optimization -// makes outputs smaller. +// ICF is short for Identical Code Folding. That is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. // -// ICF is theoretically a problem of reducing graphs by merging as many -// identical subgraphs as possible, if we consider sections as vertices and -// relocations as edges. This may be a bit more complicated problem than you -// might think. The order of processing sections matters since merging two -// sections can make other sections, whose relocations now point to the same -// section, mergeable. Graphs may contain cycles, which is common in COFF. -// We need a sophisticated algorithm to do this properly and efficiently. +// On Windows, ICF is enabled by default. // -// What we do in this file is this. We split sections into groups. Sections -// in the same group are considered identical. -// -// First, all sections are grouped by their "constant" values. Constant -// values are values that are never changed by ICF, such as section contents, -// section name, number of relocations, type and offset of each relocation, -// etc. Because we do not care about some relocation targets in this step, -// two sections in the same group may not be identical, but at least two -// sections in different groups can never be identical. -// -// Then, we try to split each group by relocation targets. Relocations are -// considered identical if and only if the relocation targets are in the -// same group. Splitting a group may make more groups to be splittable, -// because two relocations that were previously considered identical might -// now point to different groups. We repeat this step until the convergence -// is obtained. -// -// This algorithm is so-called "optimistic" algorithm described in -// http://research.google.com/pubs/pub36912.html. +// See ELF/ICF.cpp for the details about the algortihm. // //===----------------------------------------------------------------------===// #include "Chunks.h" +#include "Error.h" #include "Symbols.h" #include "lld/Core/Parallel.h" #include "llvm/ADT/Hashing.h" @@ -58,29 +34,34 @@ using namespace llvm; namespace lld { namespace coff { -typedef std::vector<SectionChunk *>::iterator ChunkIterator; -typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *); - class ICF { public: void run(const std::vector<Chunk *> &V); private: - static uint64_t getHash(SectionChunk *C); - static bool equalsConstant(const SectionChunk *A, const SectionChunk *B); - static bool equalsVariable(const SectionChunk *A, const SectionChunk *B); - bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq); - bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq); + void segregate(size_t Begin, size_t End, bool Constant); - std::atomic<uint64_t> NextID = { 1 }; -}; + bool equalsConstant(const SectionChunk *A, const SectionChunk *B); + bool equalsVariable(const SectionChunk *A, const SectionChunk *B); -// Entry point to ICF. -void doICF(const std::vector<Chunk *> &Chunks) { - ICF().run(Chunks); -} + uint32_t getHash(SectionChunk *C); + bool isEligible(SectionChunk *C); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachColor(std::function<void(size_t, size_t)> Fn); + + std::vector<SectionChunk *> Chunks; + int Cnt = 0; + std::atomic<uint32_t> NextId = {1}; + std::atomic<bool> Repeat = {false}; +}; -uint64_t ICF::getHash(SectionChunk *C) { +// Returns a hash value for S. +uint32_t ICF::getHash(SectionChunk *C) { return hash_combine(C->getPermissions(), hash_value(C->SectionName), C->NumRelocs, @@ -89,16 +70,44 @@ uint64_t ICF::getHash(SectionChunk *C) { C->Checksum); } -bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { - if (A->AssocChildren.size() != B->AssocChildren.size() || - A->NumRelocs != B->NumRelocs) { - return false; +// Returns true if section S is subject of ICF. +bool ICF::isEligible(SectionChunk *C) { + bool Global = C->Sym && C->Sym->isExternal(); + bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; + return C->isCOMDAT() && C->isLive() && Global && !Writable; +} + +// Split a range into smaller ranges by recoloring sections +void ICF::segregate(size_t Begin, size_t End, bool Constant) { + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto Bound = std::stable_partition( + Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) { + if (Constant) + return equalsConstant(Chunks[Begin], S); + return equalsVariable(Chunks[Begin], S); + }); + size_t Mid = Bound - Chunks.begin(); + + // Split [Begin, End) into [Begin, Mid) and [Mid, End). + uint32_t Id = NextId++; + for (size_t I = Begin; I < Mid; ++I) + Chunks[I]->Color[(Cnt + 1) % 2] = Id; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; } +} - // Compare associative sections. - for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) - if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) - return false; +// Compare "non-moving" part of two sections, namely everything +// except relocation targets. +bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { + if (A->NumRelocs != B->NumRelocs) + return false; // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { @@ -106,14 +115,14 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { R1.VirtualAddress != R2.VirtualAddress) { return false; } - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast<DefinedRegular>(B1)) if (auto *D2 = dyn_cast<DefinedRegular>(B2)) return D1->getValue() == D2->getValue() && - D1->getChunk()->GroupID == D2->getChunk()->GroupID; + D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; return false; }; if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq)) @@ -128,54 +137,57 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { A->getContents() == B->getContents(); } +// Compare "moving" part of two sections, namely relocation targets. bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { - // Compare associative sections. - for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) - if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) - return false; - // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast<DefinedRegular>(B1)) if (auto *D2 = dyn_cast<DefinedRegular>(B2)) - return D1->getChunk()->GroupID == D2->getChunk()->GroupID; + return D1->getChunk()->Color[Cnt % 2] == D2->getChunk()->Color[Cnt % 2]; return false; }; return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); } -bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { - bool R = false; - for (auto It = Begin;;) { - SectionChunk *Head = *It; - auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) { - return Eq(Head, SC); - }); - if (Bound == End) - return R; - uint64_t ID = NextID++; - std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; }); - It = Bound; - R = true; +size_t ICF::findBoundary(size_t Begin, size_t End) { + for (size_t I = Begin + 1; I < End; ++I) + if (Chunks[Begin]->Color[Cnt % 2] != Chunks[I]->Color[Cnt % 2]) + return I; + return End; +} + +void ICF::forEachColorRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Chunks.size()); + Fn(Begin, Mid); + Begin = Mid; } } -bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { - bool R = false; - for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { - SectionChunk *Head = *It; - auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) { - return SC->GroupID != Head->GroupID; - }); - if (segregate(It, Bound, Eq)) - R = true; - It = Bound; +// Call Fn on each color group. +void ICF::forEachColor(std::function<void(size_t, size_t)> Fn) { + // If the number of sections are too small to use threading, + // call Fn sequentially. + if (Chunks.size() < 1024) { + forEachColorRange(0, Chunks.size(), Fn); + return; } - return R; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Chunks.size() / NumShards; + parallel_for(size_t(0), NumShards, [&](size_t I) { + forEachColorRange(I * Step, (I + 1) * Step, Fn); + }); + forEachColorRange(Step * NumShards, Chunks.size(), Fn); } // Merge identical COMDAT sections. @@ -183,62 +195,62 @@ bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) { // contents and relocations are all the same. void ICF::run(const std::vector<Chunk *> &Vec) { // Collect only mergeable sections and group by hash value. - parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) { - if (auto *SC = dyn_cast<SectionChunk>(C)) { - bool Global = SC->Sym && SC->Sym->isExternal(); - bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; - if (SC->isCOMDAT() && SC->isLive() && Global && !Writable) - SC->GroupID = getHash(SC) | (uint64_t(1) << 63); - } - }); - std::vector<SectionChunk *> Chunks; for (Chunk *C : Vec) { - if (auto *SC = dyn_cast<SectionChunk>(C)) { - if (SC->GroupID) { - Chunks.push_back(SC); - } else { - SC->GroupID = NextID++; - } + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC) + continue; + + if (isEligible(SC)) { + // Set MSB to 1 to avoid collisions with non-hash colors. + SC->Color[0] = getHash(SC) | (1 << 31); + Chunks.push_back(SC); + } else { + SC->Color[0] = NextId++; } } + if (Chunks.empty()) + return; + // From now on, sections in Chunks are ordered so that sections in // the same group are consecutive in the vector. - std::sort(Chunks.begin(), Chunks.end(), - [](SectionChunk *A, SectionChunk *B) { - return A->GroupID < B->GroupID; - }); - - // Split groups until we get a convergence. - int Cnt = 1; - forEachGroup(Chunks, equalsConstant); - - for (;;) { - if (!forEachGroup(Chunks, equalsVariable)) - break; + std::stable_sort(Chunks.begin(), Chunks.end(), + [](SectionChunk *A, SectionChunk *B) { + return A->Color[0] < B->Color[0]; + }); + + // Compare static contents and assign unique IDs for each static content. + forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + ++Cnt; + + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachColor( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); ++Cnt; - } + } while (Repeat); + if (Config->Verbose) - llvm::outs() << "\nICF needed " << Cnt << " iterations.\n"; - - // Merge sections in the same group. - for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { - SectionChunk *Head = *It++; - auto Bound = std::find_if(It, End, [&](SectionChunk *SC) { - return Head->GroupID != SC->GroupID; - }); - if (It == Bound) - continue; + outs() << "\nICF needed " << Cnt << " iterations\n"; + + // Merge sections in the same colors. + forEachColor([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + if (Config->Verbose) - llvm::outs() << "Selected " << Head->getDebugName() << "\n"; - while (It != Bound) { - SectionChunk *SC = *It++; + outs() << "Selected " << Chunks[Begin]->getDebugName() << "\n"; + for (size_t I = Begin + 1; I < End; ++I) { if (Config->Verbose) - llvm::outs() << " Removed " << SC->getDebugName() << "\n"; - Head->replace(SC); + outs() << " Removed " << Chunks[I]->getDebugName() << "\n"; + Chunks[Begin]->replace(Chunks[I]); } - } + }); } +// Entry point to ICF. +void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); } + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.cpp b/contrib/llvm/tools/lld/COFF/InputFiles.cpp index ff26826371fa..0a97c2185f89 100644 --- a/contrib/llvm/tools/lld/COFF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/COFF/InputFiles.cpp @@ -7,11 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "InputFiles.h" #include "Chunks.h" #include "Config.h" +#include "Driver.h" #include "Error.h" -#include "InputFiles.h" +#include "Memory.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "llvm-c/lto.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -26,88 +30,58 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Target/TargetOptions.h" -#include "llvm-c/lto.h" #include <cstring> #include <system_error> #include <utility> +using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; namespace lld { namespace coff { -int InputFile::NextIndex = 0; -llvm::LLVMContext BitcodeFile::Context; - -// Returns the last element of a path, which is supposed to be a filename. -static StringRef getBasename(StringRef Path) { - size_t Pos = Path.find_last_of("\\/"); - if (Pos == StringRef::npos) - return Path; - return Path.substr(Pos + 1); -} +LLVMContext BitcodeFile::Context; -// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". -std::string InputFile::getShortName() { - if (ParentName == "") - return getName().lower(); - std::string Res = (getBasename(ParentName) + "(" + - getBasename(getName()) + ")").str(); - return StringRef(Res).lower(); -} +ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. - File = check(Archive::create(MB), "failed to parse static library"); - - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); + File = check(Archive::create(MB), toString(this)); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); - - // Seen is a map from member files to boolean values. Initially - // all members are mapped to false, which indicates all these files - // are not read yet. - Error Err; - for (auto &Child : File->children(Err)) - Seen[Child.getChildOffset()].clear(); - if (Err) - fatal(Err, "failed to parse static library"); + Symtab->addLazy(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. -// This function is thread-safe. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +void ArchiveFile::addMember(const Archive::Symbol *Sym) { const Archive::Child &C = check(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); // Return an empty buffer if we have already returned the same buffer. - if (Seen[C.getChildOffset()].test_and_set()) - return MemoryBufferRef(); - return check(C.getMemoryBufferRef(), - "could not get the buffer for the member defining symbol " + - Sym->getName()); + if (!Seen.insert(C.getChildOffset()).second) + return; + + Driver->enqueueArchiveMember(C, Sym->getName(), getName()); } void ObjectFile::parse() { // Parse a memory buffer as a COFF file. - std::unique_ptr<Binary> Bin = - check(createBinary(MB), "failed to parse object file"); + std::unique_ptr<Binary> Bin = check(createBinary(MB), toString(this)); if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { Bin.release(); COFFObj.reset(Obj); } else { - fatal(getName() + " is not a COFF file"); + fatal(toString(this) + " is not a COFF file"); } // Read section and symbol tables. @@ -137,13 +111,28 @@ void ObjectFile::initializeChunks() { Directives = std::string((const char *)Data.data(), Data.size()); continue; } - // Skip non-DWARF debug info. MSVC linker converts the sections into - // a PDB file, but we don't support that. - if (Name == ".debug" || Name.startswith(".debug$")) - continue; - // We want to preserve DWARF debug sections only when /debug is on. + + // Object files may have DWARF debug info or MS CodeView debug info + // (or both). + // + // DWARF sections don't need any special handling from the perspective + // of the linker; they are just a data section containing relocations. + // We can just link them to complete debug info. + // + // CodeView needs a linker support. We need to interpret and debug + // info, and then write it to a separate .pdb file. + + // Ignore debug info unless /debug is given. if (!Config->Debug && Name.startswith(".debug")) continue; + + // CodeView sections are stored to a different vector because they are + // not linked in the regular manner. + if (Name == ".debug" || Name.startswith(".debug$")) { + DebugChunks.push_back(new (Alloc) SectionChunk(this, Sec)); + continue; + } + if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) continue; auto *C = new (Alloc) SectionChunk(this, Sec); @@ -156,12 +145,14 @@ void ObjectFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); - llvm::SmallVector<std::pair<Undefined *, uint32_t>, 8> WeakAliases; + SmallVector<std::pair<SymbolBody *, uint32_t>, 8> WeakAliases; int32_t LastSectionNumber = 0; for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. - COFFSymbolRef Sym = - check(COFFObj->getSymbol(I), "broken object file: " + getName()); + ErrorOr<COFFSymbolRef> SymOrErr = COFFObj->getSymbol(I); + if (!SymOrErr) + fatal(SymOrErr.getError(), "broken object file: " + toString(this)); + COFFSymbolRef Sym = *SymOrErr; const void *AuxP = nullptr; if (Sym.getNumberOfAuxSymbols()) @@ -175,7 +166,7 @@ void ObjectFile::initializeSymbols() { Body = createUndefined(Sym); uint32_t TagIndex = static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex; - WeakAliases.emplace_back((Undefined *)Body, TagIndex); + WeakAliases.emplace_back(Body, TagIndex); } else { Body = createDefined(Sym, AuxP, IsFirst); } @@ -186,23 +177,30 @@ void ObjectFile::initializeSymbols() { I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (auto WeakAlias : WeakAliases) - WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + for (auto WeakAlias : WeakAliases) { + auto *U = dyn_cast<Undefined>(WeakAlias.first); + if (!U) + continue; + // Report an error if two undefined symbols have different weak aliases. + if (U->WeakAlias && U->WeakAlias != SparseSymbolBodies[WeakAlias.second]) + Symtab->reportDuplicate(U->symbol(), this); + U->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + } } -Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { +SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { StringRef Name; COFFObj->getSymbolName(Sym, Name); - return new (Alloc) Undefined(Name); + return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body(); } -Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, - bool IsFirst) { +SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { StringRef Name; if (Sym.isCommon()) { auto *C = new (Alloc) CommonChunk(Sym); Chunks.push_back(C); - return new (Alloc) DefinedCommon(this, Sym, C); + return Symtab->addCommon(this, Sym, C)->body(); } if (Sym.isAbsolute()) { COFFObj->getSymbolName(Sym, Name); @@ -215,7 +213,10 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SEHCompat = true; return nullptr; } - return new (Alloc) DefinedAbsolute(Name, Sym); + if (Sym.isExternal()) + return Symtab->addAbsolute(Name, Sym)->body(); + else + return new (Alloc) DefinedAbsolute(Name, Sym); } int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) @@ -223,12 +224,12 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, // Reserved sections numbers don't have contents. if (llvm::COFF::isReservedSectionNumber(SectionNumber)) - fatal("broken object file: " + getName()); + fatal("broken object file: " + toString(this)); // This symbol references a section which is not present in the section // header. if ((uint32_t)SectionNumber >= SparseChunks.size()) - fatal("broken object file: " + getName()); + fatal("broken object file: " + toString(this)); // Nothing else to do without a section chunk. auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]); @@ -245,7 +246,11 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SC->Checksum = Aux->CheckSum; } - auto *B = new (Alloc) DefinedRegular(this, Sym, SC); + DefinedRegular *B; + if (Sym.isExternal()) + B = cast<DefinedRegular>(Symtab->addRegular(this, Sym, SC)->body()); + else + B = new (Alloc) DefinedRegular(this, Sym, SC); if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) SC->setSymbol(B); @@ -307,28 +312,29 @@ void ImportFile::parse() { ExtName = ExtName.substr(0, ExtName.find('@')); break; } - ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr); - SymbolBodies.push_back(ImpSym); + + this->Hdr = Hdr; + ExternalName = ExtName; + + ImpSym = cast<DefinedImportData>( + Symtab->addImportData(ImpName, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() != llvm::COFF::IMPORT_CODE) return; - ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine); - SymbolBodies.push_back(ThunkSym); + ThunkSym = cast<DefinedImportThunk>( + Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body()); } void BitcodeFile::parse() { - // Usually parse() is thread-safe, but bitcode file is an exception. - std::lock_guard<std::mutex> Lock(Mu); - Context.enableDebugTypeODRUniquing(); ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer( Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); M = check(std::move(ModOrErr), "could not create LTO module"); - llvm::StringSaver Saver(Alloc); + StringSaver Saver(Alloc); for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { lto_symbol_attributes Attrs = M->getSymbolAttributes(I); if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) @@ -337,15 +343,15 @@ void BitcodeFile::parse() { StringRef SymName = Saver.save(M->getSymbolName(I)); int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { - SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + SymbolBodies.push_back(Symtab->addUndefined(SymName, this, false)->body()); } else { bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common (Attrs & LTO_SYMBOL_COMDAT) || // comdat (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external (Attrs & LTO_SYMBOL_ALIAS))); - SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName, - Replaceable)); + SymbolBodies.push_back( + Symtab->addBitcode(this, SymName, Replaceable)->body()); } } @@ -367,7 +373,26 @@ MachineTypes BitcodeFile::getMachineType() { } } -std::mutex BitcodeFile::Mu; +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.find_last_of("\\/"); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string toString(InputFile *File) { + if (!File) + return "(internal)"; + if (File->ParentName.empty()) + return File->getName().lower(); + + std::string Res = + (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")") + .str(); + return StringRef(Res).lower(); +} } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.h b/contrib/llvm/tools/lld/COFF/InputFiles.h index 0ec01b5075f9..498a1743e985 100644 --- a/contrib/llvm/tools/lld/COFF/InputFiles.h +++ b/contrib/llvm/tools/lld/COFF/InputFiles.h @@ -12,13 +12,13 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/legacy/LTOModule.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" #include <memory> -#include <mutex> #include <set> #include <vector> @@ -31,6 +31,7 @@ using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; @@ -38,6 +39,8 @@ class Defined; class DefinedImportData; class DefinedImportThunk; class Lazy; +class SectionChunk; +struct Symbol; class SymbolBody; class Undefined; @@ -51,67 +54,44 @@ public: // Returns the filename. StringRef getName() { return MB.getBufferIdentifier(); } - // Returns symbols defined by this file. - virtual std::vector<SymbolBody *> &getSymbols() = 0; - // Reads a file (the constructor doesn't do that). virtual void parse() = 0; // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } - // Returns a short, human-friendly filename. If this is a member of - // an archive file, a returned value includes parent's filename. - // Used for logging or debugging. - std::string getShortName(); - - // Sets a parent filename if this file is created from an archive. - void setParentName(StringRef N) { ParentName = N; } + // An archive file name if this file is created from an archive. + StringRef ParentName; // Returns .drectve section contents if exist. StringRef getDirectives() { return StringRef(Directives).trim(); } - // Each file has a unique index. The index number is used to - // resolve ties in symbol resolution. - int Index; - static int NextIndex; - protected: - InputFile(Kind K, MemoryBufferRef M) - : Index(NextIndex++), MB(M), FileKind(K) {} + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} MemoryBufferRef MB; std::string Directives; private: const Kind FileKind; - StringRef ParentName; }; // .lib or .a file. class ArchiveFile : public InputFile { public: - explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + explicit ArchiveFile(MemoryBufferRef M); static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } void parse() override; - // Returns a memory buffer for a given symbol. An empty memory buffer - // is returned if we have already returned the same memory buffer. - // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); - - llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; } - - // All symbols returned by ArchiveFiles are of Lazy type. - std::vector<SymbolBody *> &getSymbols() override { - llvm_unreachable("internal fatal"); - } + // Enqueues an archive member load for the given symbol. If we've already + // enqueued a load for the same archive member, this function does nothing, + // which ensures that we don't load the same member more than once. + void addMember(const Archive::Symbol *Sym); private: std::unique_ptr<Archive> File; std::string Filename; - std::vector<Lazy> LazySymbols; - std::map<uint64_t, std::atomic_flag> Seen; + llvm::DenseSet<uint64_t> Seen; }; // .obj or .o file. This may be a member of an archive file. @@ -122,7 +102,8 @@ public: void parse() override; MachineTypes getMachineType() override; std::vector<Chunk *> &getChunks() { return Chunks; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + std::vector<SectionChunk *> &getDebugChunks() { return DebugChunks; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } // Returns a SymbolBody object for the SymbolIndex'th symbol in the // underlying object file. @@ -146,8 +127,8 @@ private: void initializeSymbols(); void initializeSEH(); - Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); - Undefined *createUndefined(COFFSymbolRef Sym); + SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + SymbolBody *createUndefined(COFFSymbolRef Sym); std::unique_ptr<COFFObjectFile> COFFObj; llvm::BumpPtrAllocator Alloc; @@ -157,6 +138,9 @@ private: // chunks and non-section chunks for common symbols. std::vector<Chunk *> Chunks; + // CodeView debug info sections. + std::vector<SectionChunk *> DebugChunks; + // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. @@ -182,7 +166,6 @@ public: explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } DefinedImportData *ImpSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; @@ -191,10 +174,14 @@ public: private: void parse() override; - std::vector<SymbolBody *> SymbolBodies; llvm::BumpPtrAllocator Alloc; llvm::BumpPtrAllocator StringAllocAux; llvm::StringSaver StringAlloc; + +public: + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; }; // Used for LTO. @@ -202,7 +189,7 @@ class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; } MachineTypes getMachineType() override; std::unique_ptr<LTOModule> takeModule() { return std::move(M); } @@ -214,9 +201,10 @@ private: std::vector<SymbolBody *> SymbolBodies; llvm::BumpPtrAllocator Alloc; std::unique_ptr<LTOModule> M; - static std::mutex Mu; }; +std::string toString(InputFile *File); + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Librarian.cpp b/contrib/llvm/tools/lld/COFF/Librarian.cpp index 25fb4a87b3eb..4c597fad7345 100644 --- a/contrib/llvm/tools/lld/COFF/Librarian.cpp +++ b/contrib/llvm/tools/lld/COFF/Librarian.cpp @@ -54,7 +54,7 @@ static uint16_t getImgRelRelocation() { } } -template <class T> void append(std::vector<uint8_t> &B, const T &Data) { +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { size_t S = B.size(); B.resize(S + sizeof(T)); memcpy(&B[S], &Data, sizeof(T)); @@ -352,15 +352,16 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { static const uint32_t NumberOfSections = 2; static const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit() ? 4 : 8; // COFF Header coff_file_header Header{ u16(Config->Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + // .idata$5 - sizeof(export_address_table_entry) + + VASize + // .idata$4 - sizeof(export_address_table_entry)), + VASize), u32(NumberOfSymbols), u16(0), u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), }; @@ -371,36 +372,40 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, u32(0), u32(0), - u32(sizeof(export_address_table_entry)), + u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), u32(0), u32(0), u16(0), u16(0), - u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, u32(0), u32(0), - u32(sizeof(export_address_table_entry)), + u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + - sizeof(export_address_table_entry)), + VASize), u32(0), u32(0), u16(0), u16(0), - u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + u32((is32bit() ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, }; append(Buffer, SectionTable); - // .idata$5 - static const export_address_table_entry ILT{u32(0)}; - append(Buffer, ILT); + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); - // .idata$4 - static const export_address_table_entry IAT{u32(0)}; - append(Buffer, IAT); + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit()) + append(Buffer, u32(0)); // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { @@ -458,7 +463,7 @@ void lld::coff::writeImportLibrary() { std::vector<NewArchiveMember> Members; std::string Path = getImplibPath(); - std::string DLLName = llvm::sys::path::filename(Config->OutputFile); + std::string DLLName = sys::path::filename(Config->OutputFile); ObjectFactory OF(DLLName); std::vector<uint8_t> ImportDescriptor; diff --git a/contrib/llvm/tools/lld/COFF/MarkLive.cpp b/contrib/llvm/tools/lld/COFF/MarkLive.cpp index 0870986ad81a..0156d238b672 100644 --- a/contrib/llvm/tools/lld/COFF/MarkLive.cpp +++ b/contrib/llvm/tools/lld/COFF/MarkLive.cpp @@ -38,8 +38,8 @@ void markLive(const std::vector<Chunk *> &Chunks) { }; // Add GC root chunks. - for (Undefined *U : Config->GCRoot) - if (auto *D = dyn_cast<DefinedRegular>(U->repl())) + for (SymbolBody *B : Config->GCRoot) + if (auto *D = dyn_cast<DefinedRegular>(B)) Enqueue(D->getChunk()); while (!Worklist.empty()) { @@ -48,7 +48,7 @@ void markLive(const std::vector<Chunk *> &Chunks) { // Mark all symbols listed in the relocation table for this section. for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast<DefinedRegular>(S->repl())) + if (auto *D = dyn_cast<DefinedRegular>(S)) Enqueue(D->getChunk()); // Mark associative sections if any. diff --git a/contrib/llvm/tools/lld/COFF/Memory.h b/contrib/llvm/tools/lld/COFF/Memory.h new file mode 100644 index 000000000000..526f11344a09 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Memory.h @@ -0,0 +1,52 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See ELF/Memory.h +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_MEMORY_H +#define LLD_COFF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace coff { + +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/ModuleDef.cpp b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp index 5e393f45d184..a273b6f535db 100644 --- a/contrib/llvm/tools/lld/COFF/ModuleDef.cpp +++ b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp @@ -18,6 +18,7 @@ #include "Config.h" #include "Error.h" +#include "Memory.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/StringSaver.h" @@ -113,7 +114,7 @@ private: class Parser { public: - explicit Parser(StringRef S, StringSaver *A) : Lex(S), Alloc(A) {} + explicit Parser(StringRef S) : Lex(S) {} void parse() { do { @@ -197,9 +198,9 @@ private: if (Config->Machine == I386) { if (!isDecorated(E.Name)) - E.Name = Alloc->save("_" + E.Name); + E.Name = Saver.save("_" + E.Name); if (!E.ExtName.empty() && !isDecorated(E.ExtName)) - E.ExtName = Alloc->save("_" + E.ExtName); + E.ExtName = Saver.save("_" + E.ExtName); } for (;;) { @@ -278,14 +279,11 @@ private: Lexer Lex; Token Tok; std::vector<Token> Stack; - StringSaver *Alloc; }; } // anonymous namespace -void parseModuleDefs(MemoryBufferRef MB, StringSaver *Alloc) { - Parser(MB.getBuffer(), Alloc).parse(); -} +void parseModuleDefs(MemoryBufferRef MB) { Parser(MB.getBuffer()).parse(); } } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Options.td b/contrib/llvm/tools/lld/COFF/Options.td index e5c9c5b4635b..9dfbcc8e188c 100644 --- a/contrib/llvm/tools/lld/COFF/Options.td +++ b/contrib/llvm/tools/lld/COFF/Options.td @@ -27,6 +27,7 @@ def failifmismatch : P<"failifmismatch", "">; def heap : P<"heap", "Size of the heap">; def implib : P<"implib", "Import library name">; def libpath : P<"libpath", "Additional library search path">; +def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; def machine : P<"machine", "Specify target platform">; def merge : P<"merge", "Combine sections">; def mllvm : P<"mllvm", "Options to pass to LLVM">; @@ -61,7 +62,9 @@ def deffile : Joined<["/", "-"], "def:">, HelpText<"Use module-definition file">; def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; +def debugtype : P<"debugtype", "Debug Info Options">; def dll : F<"dll">, HelpText<"Create a DLL">; +def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; def nodefaultlib_all : F<"nodefaultlib">; def noentry : F<"noentry">; def profile : F<"profile">; @@ -91,7 +94,10 @@ def help_q : Flag<["/?", "-?"], "">, Alias<help>; def nosymtab : F<"nosymtab">; // Flags for debugging -def lldmap : Joined<["/", "-"], "lldmap:">; +def debugpdb : F<"debugpdb">; +def dumppdb : Joined<["/", "-"], "dumppdb">; +def lldmap : F<"lldmap">; +def lldmap_file : Joined<["/", "-"], "lldmap:">; //============================================================================== // The flags below do nothing. They are defined only for link.exe compatibility. diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp index 7606ccc680d3..56d5a3651143 100644 --- a/contrib/llvm/tools/lld/COFF/PDB.cpp +++ b/contrib/llvm/tools/lld/COFF/PDB.cpp @@ -7,55 +7,187 @@ // //===----------------------------------------------------------------------===// -#include "Driver.h" +#include "PDB.h" +#include "Chunks.h" +#include "Config.h" #include "Error.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "llvm/DebugInfo/CodeView/SymbolDumper.h" +#include "llvm/DebugInfo/CodeView/TypeDumper.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/ScopedPrinter.h" #include <memory> +using namespace lld; +using namespace lld::coff; using namespace llvm; +using namespace llvm::codeview; using namespace llvm::support; using namespace llvm::support::endian; -const int PageSize = 4096; -const uint8_t Magic[32] = "Microsoft C/C++ MSF 7.00\r\n\032DS\0\0"; - -namespace { -struct PDBHeader { - uint8_t Magic[32]; - ulittle32_t PageSize; - ulittle32_t FpmPage; - ulittle32_t PageCount; - ulittle32_t RootSize; - ulittle32_t Reserved; - ulittle32_t RootPointer; -}; -} - -void lld::coff::createPDB(StringRef Path) { - // Create a file. - size_t FileSize = PageSize * 3; - ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Path, FileSize); - if (auto EC = BufferOrErr.getError()) - fatal(EC, "failed to open " + Path); - std::unique_ptr<FileOutputBuffer> Buffer = std::move(*BufferOrErr); - - // Write the file header. - uint8_t *Buf = Buffer->getBufferStart(); - auto *Hdr = reinterpret_cast<PDBHeader *>(Buf); - memcpy(Hdr->Magic, Magic, sizeof(Magic)); - Hdr->PageSize = PageSize; - // I don't know what FpmPage field means, but it must not be 0. - Hdr->FpmPage = 1; - Hdr->PageCount = FileSize / PageSize; - // Root directory is empty, containing only the length field. - Hdr->RootSize = 4; - // Root directory is on page 1. - Hdr->RootPointer = 1; - - // Write the root directory. Root stream is on page 2. - write32le(Buf + PageSize, 2); - Buffer->commit(); +using llvm::object::coff_section; + +static ExitOnError ExitOnErr; + +// Returns a list of all SectionChunks. +static std::vector<coff_section> getInputSections(SymbolTable *Symtab) { + std::vector<coff_section> V; + for (Chunk *C : Symtab->getChunks()) + if (auto *SC = dyn_cast<SectionChunk>(C)) + V.push_back(*SC->Header); + return V; +} + +static SectionChunk *findByName(std::vector<SectionChunk *> &Sections, + StringRef Name) { + for (SectionChunk *C : Sections) + if (C->getSectionName() == Name) + return C; + return nullptr; +} + +static ArrayRef<uint8_t> getDebugT(ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$T"); + if (!Sec) + return {}; + + // First 4 bytes are section magic. + ArrayRef<uint8_t> Data = Sec->getContents(); + if (Data.size() < 4) + fatal(".debug$T too short"); + if (read32le(Data.data()) != COFF::DEBUG_SECTION_MAGIC) + fatal(".debug$T has an invalid magic"); + return Data.slice(4); +} + +static void dumpDebugT(ScopedPrinter &W, ObjectFile *File) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + return; + + msf::ByteStream Stream(Data); + CVTypeDumper TypeDumper(&W, false); + if (auto EC = TypeDumper.dump(Data)) + fatal(EC, "CVTypeDumper::dump failed"); +} + +static void dumpDebugS(ScopedPrinter &W, ObjectFile *File) { + SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$S"); + if (!Sec) + return; + + msf::ByteStream Stream(Sec->getContents()); + CVSymbolArray Symbols; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Symbols, Reader.getLength())) + fatal(EC, "StreamReader.readArray<CVSymbolArray> failed"); + + CVTypeDumper TypeDumper(&W, false); + CVSymbolDumper SymbolDumper(W, TypeDumper, nullptr, false); + if (auto EC = SymbolDumper.dump(Symbols)) + fatal(EC, "CVSymbolDumper::dump failed"); +} + +// Dump CodeView debug info. This is for debugging. +static void dumpCodeView(SymbolTable *Symtab) { + ScopedPrinter W(outs()); + + for (ObjectFile *File : Symtab->ObjectFiles) { + dumpDebugT(W, File); + dumpDebugS(W, File); + } +} + +static void addTypeInfo(SymbolTable *Symtab, + pdb::TpiStreamBuilder &TpiBuilder) { + for (ObjectFile *File : Symtab->ObjectFiles) { + ArrayRef<uint8_t> Data = getDebugT(File); + if (Data.empty()) + continue; + + msf::ByteStream Stream(Data); + codeview::CVTypeArray Records; + msf::StreamReader Reader(Stream); + if (auto EC = Reader.readArray(Records, Reader.getLength())) + fatal(EC, "Reader.readArray failed"); + for (const codeview::CVType &Rec : Records) + TpiBuilder.addTypeRecord(Rec); + } +} + +// Creates a PDB file. +void coff::createPDB(StringRef Path, SymbolTable *Symtab, + ArrayRef<uint8_t> SectionTable) { + if (Config->DumpPdb) + dumpCodeView(Symtab); + + BumpPtrAllocator Alloc; + pdb::PDBFileBuilder Builder(Alloc); + ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize + + // Create streams in MSF for predefined streams, namely + // PDB, TPI, DBI and IPI. + for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I) + ExitOnErr(Builder.getMsfBuilder().addStream(0)); + + // Add an Info stream. + auto &InfoBuilder = Builder.getInfoBuilder(); + InfoBuilder.setAge(1); + + // Should be a random number, 0 for now. + InfoBuilder.setGuid({}); + + // Should be the current time, but set 0 for reproducibilty. + InfoBuilder.setSignature(0); + InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); + + // Add an empty DPI stream. + auto &DbiBuilder = Builder.getDbiBuilder(); + DbiBuilder.setVersionHeader(pdb::PdbDbiV110); + + // Add an empty TPI stream. + auto &TpiBuilder = Builder.getTpiBuilder(); + TpiBuilder.setVersionHeader(pdb::PdbTpiV80); + if (Config->DebugPdb) + addTypeInfo(Symtab, TpiBuilder); + + // Add an empty IPI stream. + auto &IpiBuilder = Builder.getIpiBuilder(); + IpiBuilder.setVersionHeader(pdb::PdbTpiV80); + + // Add Section Contributions. + std::vector<pdb::SectionContrib> Contribs = + pdb::DbiStreamBuilder::createSectionContribs(getInputSections(Symtab)); + DbiBuilder.setSectionContribs(Contribs); + + // Add Section Map stream. + ArrayRef<object::coff_section> Sections = { + (const object::coff_section *)SectionTable.data(), + SectionTable.size() / sizeof(object::coff_section)}; + std::vector<pdb::SecMapEntry> SectionMap = + pdb::DbiStreamBuilder::createSectionMap(Sections); + DbiBuilder.setSectionMap(SectionMap); + + ExitOnErr(DbiBuilder.addModuleInfo("", "* Linker *")); + + // Add COFF section header stream. + ExitOnErr( + DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); + + // Write to a file. + ExitOnErr(Builder.commit(Path)); } diff --git a/contrib/llvm/tools/lld/COFF/PDB.h b/contrib/llvm/tools/lld/COFF/PDB.h new file mode 100644 index 000000000000..091e90fa1ef1 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/PDB.h @@ -0,0 +1,25 @@ +//===- PDB.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_PDB_H +#define LLD_COFF_PDB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace coff { +class SymbolTable; + +void createPDB(llvm::StringRef Path, SymbolTable *Symtab, + llvm::ArrayRef<uint8_t> SectionTable); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/Strings.cpp b/contrib/llvm/tools/lld/COFF/Strings.cpp new file mode 100644 index 000000000000..d0558413f673 --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Strings.cpp @@ -0,0 +1,30 @@ +//===- Strings.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Strings.h" + +#if defined(_MSC_VER) +#include <Windows.h> +#include <DbgHelp.h> +#pragma comment(lib, "dbghelp.lib") +#endif + +using namespace lld; +using namespace lld::coff; +using namespace llvm; + +Optional<std::string> coff::demangle(StringRef S) { +#if defined(_MSC_VER) + char Buf[4096]; + if (S.startswith("?")) + if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0)) + return std::string(Buf, Len); +#endif + return None; +} diff --git a/contrib/llvm/tools/lld/COFF/Strings.h b/contrib/llvm/tools/lld/COFF/Strings.h new file mode 100644 index 000000000000..1f85f3e2da5c --- /dev/null +++ b/contrib/llvm/tools/lld/COFF/Strings.h @@ -0,0 +1,23 @@ +//===- Strings.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_STRINGS_H +#define LLD_COFF_STRINGS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include <string> + +namespace lld { +namespace coff { +llvm::Optional<std::string> demangle(llvm::StringRef S); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp index df9da4c36650..9cc0b75c1510 100644 --- a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "SymbolTable.h" #include "Config.h" #include "Driver.h" #include "Error.h" -#include "SymbolTable.h" +#include "Memory.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/legacy/LTOCodeGenerator.h" #include "llvm/Support/Debug.h" @@ -24,222 +24,265 @@ using namespace llvm; namespace lld { namespace coff { -void SymbolTable::addFile(std::unique_ptr<InputFile> FileP) { -#if LLVM_ENABLE_THREADS - std::launch Policy = std::launch::async; -#else - std::launch Policy = std::launch::deferred; -#endif +SymbolTable *Symtab; - InputFile *File = FileP.get(); - Files.push_back(std::move(FileP)); - if (auto *F = dyn_cast<ArchiveFile>(File)) { - ArchiveQueue.push_back( - std::async(Policy, [=]() { F->parse(); return F; })); - return; +void SymbolTable::addFile(InputFile *File) { + if (Config->Verbose) + outs() << "Reading " << toString(File) << "\n"; + File->parse(); + + MachineTypes MT = File->getMachineType(); + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + Config->Machine = MT; + } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { + fatal(toString(File) + ": machine type " + machineToStr(MT) + + " conflicts with " + machineToStr(Config->Machine)); } - ObjectQueue.push_back( - std::async(Policy, [=]() { File->parse(); return File; })); + if (auto *F = dyn_cast<ObjectFile>(File)) { ObjectFiles.push_back(F); } else if (auto *F = dyn_cast<BitcodeFile>(File)) { BitcodeFiles.push_back(F); - } else { - ImportFiles.push_back(cast<ImportFile>(File)); + } else if (auto *F = dyn_cast<ImportFile>(File)) { + ImportFiles.push_back(F); } -} -void SymbolTable::step() { - if (queueEmpty()) + StringRef S = File->getDirectives(); + if (S.empty()) return; - readObjects(); - readArchives(); -} -void SymbolTable::run() { - while (!queueEmpty()) - step(); -} - -void SymbolTable::readArchives() { - if (ArchiveQueue.empty()) - return; - - // Add lazy symbols to the symbol table. Lazy symbols that conflict - // with existing undefined symbols are accumulated in LazySyms. - std::vector<Symbol *> LazySyms; - for (std::future<ArchiveFile *> &Future : ArchiveQueue) { - ArchiveFile *File = Future.get(); - if (Config->Verbose) - llvm::outs() << "Reading " << File->getShortName() << "\n"; - for (Lazy &Sym : File->getLazySymbols()) - addLazy(&Sym, &LazySyms); - } - ArchiveQueue.clear(); - - // Add archive member files to ObjectQueue that should resolve - // existing undefined symbols. - for (Symbol *Sym : LazySyms) - addMemberFile(cast<Lazy>(Sym->Body)); -} - -void SymbolTable::readObjects() { - if (ObjectQueue.empty()) - return; - - // Add defined and undefined symbols to the symbol table. - std::vector<StringRef> Directives; - for (size_t I = 0; I < ObjectQueue.size(); ++I) { - InputFile *File = ObjectQueue[I].get(); - if (Config->Verbose) - llvm::outs() << "Reading " << File->getShortName() << "\n"; - // Adding symbols may add more files to ObjectQueue - // (but not to ArchiveQueue). - for (SymbolBody *Sym : File->getSymbols()) - if (Sym->isExternal()) - addSymbol(Sym); - StringRef S = File->getDirectives(); - if (!S.empty()) { - Directives.push_back(S); - if (Config->Verbose) - llvm::outs() << "Directives: " << File->getShortName() - << ": " << S << "\n"; - } - } - ObjectQueue.clear(); - - // Parse directive sections. This may add files to - // ArchiveQueue and ObjectQueue. - for (StringRef S : Directives) - Driver->parseDirectives(S); -} - -bool SymbolTable::queueEmpty() { - return ArchiveQueue.empty() && ObjectQueue.empty(); + if (Config->Verbose) + outs() << "Directives: " << toString(File) << ": " << S << "\n"; + Driver->parseDirectives(S); } -void SymbolTable::reportRemainingUndefines(bool Resolve) { - llvm::SmallPtrSet<SymbolBody *, 8> Undefs; +void SymbolTable::reportRemainingUndefines() { + SmallPtrSet<SymbolBody *, 8> Undefs; for (auto &I : Symtab) { Symbol *Sym = I.second; - auto *Undef = dyn_cast<Undefined>(Sym->Body); + auto *Undef = dyn_cast<Undefined>(Sym->body()); if (!Undef) continue; + if (!Sym->IsUsedInRegularObj) + continue; StringRef Name = Undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *D = Undef->getWeakAlias()) { - if (Resolve) - Sym->Body = D; + // We resolve weak aliases by replacing the alias's SymbolBody with the + // target's SymbolBody. This causes all SymbolBody pointers referring to + // the old symbol to instead refer to the new symbol. However, we can't + // just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body + // because D may be an internal symbol, and internal symbols are stored as + // "unparented" SymbolBodies. For that reason we need to check which type + // of symbol we are dealing with and copy the correct number of bytes. + if (isa<DefinedRegular>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular)); + else if (isa<DefinedAbsolute>(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute)); + else + // No other internal symbols are possible. + Sym->Body = D->symbol()->Body; continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (Name.startswith("__imp_")) { Symbol *Imp = find(Name.substr(strlen("__imp_"))); - if (Imp && isa<Defined>(Imp->Body)) { - if (!Resolve) - continue; - auto *D = cast<Defined>(Imp->Body); - auto *S = new (Alloc) DefinedLocalImport(Name, D); - LocalImportChunks.push_back(S->getChunk()); - Sym->Body = S; + if (Imp && isa<Defined>(Imp->body())) { + auto *D = cast<Defined>(Imp->body()); + replaceBody<DefinedLocalImport>(Sym, Name, D); + LocalImportChunks.push_back( + cast<DefinedLocalImport>(Sym->body())->getChunk()); continue; } } // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. - if (Config->Force && Resolve) - Sym->Body = new (Alloc) DefinedAbsolute(Name, 0); - Undefs.insert(Sym->Body); + if (Config->Force) + replaceBody<DefinedAbsolute>(Sym, Name, 0); + Undefs.insert(Sym->body()); } if (Undefs.empty()) return; - for (Undefined *U : Config->GCRoot) - if (Undefs.count(U->repl())) - llvm::errs() << "<root>: undefined symbol: " << U->getName() << "\n"; - for (std::unique_ptr<InputFile> &File : Files) - if (!isa<ArchiveFile>(File.get())) - for (SymbolBody *Sym : File->getSymbols()) - if (Undefs.count(Sym->repl())) - llvm::errs() << File->getShortName() << ": undefined symbol: " - << Sym->getName() << "\n"; + for (SymbolBody *B : Config->GCRoot) + if (Undefs.count(B)) + errs() << "<root>: undefined symbol: " << B->getName() << "\n"; + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + errs() << toString(File) << ": undefined symbol: " << Sym->getName() + << "\n"; if (!Config->Force) fatal("link failed"); } -void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) { - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - if (isa<Defined>(Existing)) - return; - if (Lazy *L = dyn_cast<Lazy>(Existing)) - if (L->getFileIndex() < New->getFileIndex()) - return; - Sym->Body = New; - New->setBackref(Sym); - if (isa<Undefined>(Existing)) - Accum->push_back(Sym); +std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { + Symbol *&Sym = Symtab[CachedHashStringRef(Name)]; + if (Sym) + return {Sym, false}; + Sym = make<Symbol>(); + Sym->IsUsedInRegularObj = false; + Sym->PendingArchiveLoad = false; + return {Sym, true}; } -void SymbolTable::addSymbol(SymbolBody *New) { - // Find an existing symbol or create and insert a new one. - assert(isa<Defined>(New) || isa<Undefined>(New)); - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - - // If we have an undefined symbol and a lazy symbol, - // let the lazy symbol to read a member file. - if (auto *L = dyn_cast<Lazy>(Existing)) { - // Undefined symbols with weak aliases need not to be resolved, - // since they would be replaced with weak aliases if they remain - // undefined. - if (auto *U = dyn_cast<Undefined>(New)) { - if (!U->WeakAlias) { - addMemberFile(L); - return; - } +Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, + bool IsWeakAlias) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (!F || !isa<BitcodeFile>(F)) + S->IsUsedInRegularObj = true; + if (WasInserted || (isa<Lazy>(S->body()) && IsWeakAlias)) { + replaceBody<Undefined>(S, Name); + return S; + } + if (auto *L = dyn_cast<Lazy>(S->body())) { + if (!S->PendingArchiveLoad) { + S->PendingArchiveLoad = true; + L->File->addMember(&L->Sym); } - Sym->Body = New; + } + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { + StringRef Name = Sym.getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody<Lazy>(S, F, Sym); return; } + auto *U = dyn_cast<Undefined>(S->body()); + if (!U || U->WeakAlias || S->PendingArchiveLoad) + return; + S->PendingArchiveLoad = true; + F->addMember(&Sym); +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + fatal("duplicate symbol: " + toString(*Existing->body()) + " in " + + toString(Existing->body()->getFile()) + " and in " + + (NewFile ? toString(NewFile) : "(internal)")); +} - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. - int Comp = Existing->compare(New); - if (Comp == 0) - fatal("duplicate symbol: " + Existing->getDebugName() + " and " + - New->getDebugName()); - if (Comp < 0) - Sym->Body = New; +Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, Sym); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; } -Symbol *SymbolTable::insert(SymbolBody *New) { - Symbol *&Sym = Symtab[New->getName()]; - if (Sym) { - New->setBackref(Sym); - return Sym; +Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedAbsolute>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRelative(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRelative>(S, N, VA); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRegular(ObjectFile *F, COFFSymbolRef Sym, + SectionChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (auto *R = dyn_cast<DefinedRegular>(S->body())) { + if (!C->isCOMDAT() || !R->isCOMDAT()) + reportDuplicate(S, F); + } else if (auto *B = dyn_cast<DefinedBitcode>(S->body())) { + if (B->IsReplaceable) + replaceBody<DefinedRegular>(S, F, Sym, C); + else if (!C->isCOMDAT()) + reportDuplicate(S, F); + } else + replaceBody<DefinedRegular>(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) { + replaceBody<DefinedBitcode>(S, F, N, IsReplaceable); + return S; } - Sym = new (Alloc) Symbol(New); - New->setBackref(Sym); - return Sym; + if (isa<DefinedCommon>(S->body())) + return S; + if (IsReplaceable) + if (isa<DefinedRegular>(S->body()) || isa<DefinedBitcode>(S->body())) + return S; + reportDuplicate(S, F); + return S; } -// Reads an archive member file pointed by a given symbol. -void SymbolTable::addMemberFile(Lazy *Body) { - std::unique_ptr<InputFile> File = Body->getMember(); +Symbol *SymbolTable::addCommon(ObjectFile *F, COFFSymbolRef Sym, + CommonChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || !isa<DefinedCOFF>(S->body())) + replaceBody<DefinedCommon>(S, F, Sym, C); + else if (auto *DC = dyn_cast<DefinedCommon>(S->body())) + if (Sym.getValue() > DC->getSize()) + replaceBody<DefinedCommon>(S, F, Sym, C); + return S; +} - // getMember returns an empty buffer if the member was already - // read from the library. - if (!File) - return; - if (Config->Verbose) - llvm::outs() << "Loaded " << File->getShortName() << " for " - << Body->getName() << "\n"; - addFile(std::move(File)); +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportData>(S, N, F); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body())) + replaceBody<DefinedImportThunk>(S, Name, ID, Machine); + else if (!isa<DefinedCOFF>(S->body())) + reportDuplicate(S, nullptr); + return S; } std::vector<Chunk *> SymbolTable::getChunks() { @@ -252,7 +295,7 @@ std::vector<Chunk *> SymbolTable::getChunks() { } Symbol *SymbolTable::find(StringRef Name) { - auto It = Symtab.find(Name); + auto It = Symtab.find(CachedHashStringRef(Name)); if (It == Symtab.end()) return nullptr; return It->second; @@ -266,7 +309,7 @@ Symbol *SymbolTable::findUnderscore(StringRef Name) { StringRef SymbolTable::findByPrefix(StringRef Prefix) { for (auto Pair : Symtab) { - StringRef Name = Pair.first; + StringRef Name = Pair.first.val(); if (Name.startswith(Prefix)) return Name; } @@ -275,7 +318,7 @@ StringRef SymbolTable::findByPrefix(StringRef Prefix) { StringRef SymbolTable::findMangle(StringRef Name) { if (Symbol *Sym = find(Name)) - if (!isa<Undefined>(Sym->Body)) + if (!isa<Undefined>(Sym->body())) return Name; if (Config->Machine != I386) return findByPrefix(("?" + Name + "@@Y").str()); @@ -289,39 +332,22 @@ StringRef SymbolTable::findMangle(StringRef Name) { return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); } -void SymbolTable::mangleMaybe(Undefined *U) { - if (U->WeakAlias) - return; - if (!isa<Undefined>(U->repl())) +void SymbolTable::mangleMaybe(SymbolBody *B) { + auto *U = dyn_cast<Undefined>(B); + if (!U || U->WeakAlias) return; StringRef Alias = findMangle(U->getName()); if (!Alias.empty()) U->WeakAlias = addUndefined(Alias); } -Undefined *SymbolTable::addUndefined(StringRef Name) { - auto *New = new (Alloc) Undefined(Name); - addSymbol(New); - if (auto *U = dyn_cast<Undefined>(New->repl())) - return U; - return New; -} - -DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedRelative(Name, VA); - addSymbol(New); - return New; -} - -DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedAbsolute(Name, VA); - addSymbol(New); - return New; +SymbolBody *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, nullptr, false)->body(); } void SymbolTable::printMap(llvm::raw_ostream &OS) { for (ObjectFile *File : ObjectFiles) { - OS << File->getShortName() << ":\n"; + OS << toString(File) << ":\n"; for (SymbolBody *Body : File->getSymbols()) if (auto *R = dyn_cast<DefinedRegular>(Body)) if (R->getChunk()->isLive()) @@ -330,84 +356,32 @@ void SymbolTable::printMap(llvm::raw_ostream &OS) { } } -void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { - for (SymbolBody *Body : Obj->getSymbols()) { - if (!Body->isExternal()) - continue; - // We should not see any new undefined symbols at this point, but we'll - // diagnose them later in reportRemainingUndefines(). - StringRef Name = Body->getName(); - Symbol *Sym = insert(Body); - SymbolBody *Existing = Sym->Body; - - if (Existing == Body) - continue; - - if (isa<DefinedBitcode>(Existing)) { - Sym->Body = Body; - continue; - } - if (auto *L = dyn_cast<Lazy>(Existing)) { - // We may see new references to runtime library symbols such as __chkstk - // here. These symbols must be wholly defined in non-bitcode files. - addMemberFile(L); - continue; - } - - int Comp = Existing->compare(Body); - if (Comp == 0) - fatal("LTO: unexpected duplicate symbol: " + Name); - if (Comp < 0) - Sym->Body = Body; - } -} - void SymbolTable::addCombinedLTOObjects() { if (BitcodeFiles.empty()) return; - // Diagnose any undefined symbols early, but do not resolve weak externals, - // as resolution breaks the invariant that each Symbol points to a unique - // SymbolBody, which we rely on to replace DefinedBitcode symbols correctly. - reportRemainingUndefines(/*Resolve=*/false); - // Create an object file and add it to the symbol table by replacing any // DefinedBitcode symbols with the definitions in the object file. LTOCodeGenerator CG(BitcodeFile::Context); CG.setOptLevel(Config->LTOOptLevel); - std::vector<ObjectFile *> Objs = createLTOObjects(&CG); - - for (ObjectFile *Obj : Objs) - addCombinedLTOObject(Obj); - - size_t NumBitcodeFiles = BitcodeFiles.size(); - run(); - if (BitcodeFiles.size() != NumBitcodeFiles) - fatal("LTO: late loaded symbol created new bitcode reference"); + for (ObjectFile *Obj : createLTOObjects(&CG)) + Obj->parse(); } // Combine and compile bitcode files and then return the result // as a vector of regular COFF object files. std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { - // All symbols referenced by non-bitcode objects must be preserved. - for (ObjectFile *File : ObjectFiles) - for (SymbolBody *Body : File->getSymbols()) - if (auto *S = dyn_cast<DefinedBitcode>(Body->repl())) - CG->addMustPreserveSymbol(S->getName()); - - // Likewise for bitcode symbols which we initially resolved to non-bitcode. + // All symbols referenced by non-bitcode objects, including GC roots, must be + // preserved. We must also replace bitcode symbols with undefined symbols so + // that they may be replaced with real definitions without conflicting. for (BitcodeFile *File : BitcodeFiles) - for (SymbolBody *Body : File->getSymbols()) - if (isa<DefinedBitcode>(Body) && !isa<DefinedBitcode>(Body->repl())) + for (SymbolBody *Body : File->getSymbols()) { + if (!isa<DefinedBitcode>(Body)) + continue; + if (Body->symbol()->IsUsedInRegularObj) CG->addMustPreserveSymbol(Body->getName()); - - // Likewise for other symbols that must be preserved. - for (Undefined *U : Config->GCRoot) { - if (auto *S = dyn_cast<DefinedBitcode>(U->repl())) - CG->addMustPreserveSymbol(S->getName()); - else if (auto *S = dyn_cast_or_null<DefinedBitcode>(U->getWeakAlias())) - CG->addMustPreserveSymbol(S->getName()); - } + replaceBody<Undefined>(Body->symbol(), Body->getName()); + } CG->setModule(BitcodeFiles[0]->takeModule()); for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) @@ -434,10 +408,8 @@ std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { std::vector<ObjectFile *> ObjFiles; for (SmallString<0> &Obj : Objs) { - auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "<LTO object>")); - Files.emplace_back(ObjFile); + auto *ObjFile = make<ObjectFile>(MemoryBufferRef(Obj, "<LTO object>")); ObjectFiles.push_back(ObjFile); - ObjFile->parse(); ObjFiles.push_back(ObjFile); } diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.h b/contrib/llvm/tools/lld/COFF/SymbolTable.h index 8bf4387cdfff..703821f2e124 100644 --- a/contrib/llvm/tools/lld/COFF/SymbolTable.h +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.h @@ -11,18 +11,12 @@ #define LLD_COFF_SYMBOL_TABLE_H #include "InputFiles.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" -#ifdef _MSC_VER -// <future> depends on <eh.h> for __uncaught_exception. -#include <eh.h> -#endif - -#include <future> - namespace llvm { struct LTOCodeGenerator; } @@ -31,8 +25,12 @@ namespace lld { namespace coff { class Chunk; +class CommonChunk; class Defined; +class DefinedAbsolute; +class DefinedRelative; class Lazy; +class SectionChunk; class SymbolBody; struct Symbol; @@ -45,18 +43,17 @@ struct Symbol; // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition -// to replace the lazy symbol. The logic is implemented in resolve(). +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. class SymbolTable { public: - void addFile(std::unique_ptr<InputFile> File); - std::vector<std::unique_ptr<InputFile>> &getFiles() { return Files; } - void step(); - void run(); - bool queueEmpty(); + void addFile(InputFile *File); - // Print an error message on undefined symbols. If Resolve is true, try to - // resolve any undefined symbols and update the symbol table accordingly. - void reportRemainingUndefines(bool Resolve); + // Try to resolve any undefined symbols and update the symbol table + // accordingly, then print an error message for any remaining undefined + // symbols. + void reportRemainingUndefines(); // Returns a list of chunks of selected symbols. std::vector<Chunk *> getChunks(); @@ -69,7 +66,7 @@ public: // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. - void mangleMaybe(Undefined *U); + void mangleMaybe(SymbolBody *B); StringRef findMangle(StringRef Name); // Print a layout map to OS. @@ -88,37 +85,44 @@ public: std::vector<ObjectFile *> ObjectFiles; // Creates an Undefined symbol for a given name. - Undefined *addUndefined(StringRef Name); - DefinedRelative *addRelative(StringRef Name, uint64_t VA); - DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA); + SymbolBody *addUndefined(StringRef Name); + + Symbol *addRelative(StringRef N, uint64_t VA); + Symbol *addAbsolute(StringRef N, uint64_t VA); + + Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias); + void addLazy(ArchiveFile *F, const Archive::Symbol Sym); + Symbol *addAbsolute(StringRef N, COFFSymbolRef S); + Symbol *addRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C); + Symbol *addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable); + Symbol *addCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); // A list of chunks which to be added to .rdata. std::vector<Chunk *> LocalImportChunks; private: - void readArchives(); + void readArchive(); void readObjects(); - void addSymbol(SymbolBody *New); - void addLazy(Lazy *New, std::vector<Symbol *> *Accum); - Symbol *insert(SymbolBody *New); + std::pair<Symbol *, bool> insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); - void addMemberFile(Lazy *Body); void addCombinedLTOObject(ObjectFile *Obj); std::vector<ObjectFile *> createLTOObjects(llvm::LTOCodeGenerator *CG); - llvm::DenseMap<StringRef, Symbol *> Symtab; - - std::vector<std::unique_ptr<InputFile>> Files; - std::vector<std::future<ArchiveFile *>> ArchiveQueue; - std::vector<std::future<InputFile *>> ObjectQueue; + llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> Symtab; std::vector<BitcodeFile *> BitcodeFiles; std::vector<SmallString<0>> Objs; - llvm::BumpPtrAllocator Alloc; }; +extern SymbolTable *Symtab; + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Symbols.cpp b/contrib/llvm/tools/lld/COFF/Symbols.cpp index 6e2db6631ce7..6de85d581f49 100644 --- a/contrib/llvm/tools/lld/COFF/Symbols.cpp +++ b/contrib/llvm/tools/lld/COFF/Symbols.cpp @@ -7,16 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "Symbols.h" #include "Error.h" #include "InputFiles.h" -#include "Symbols.h" +#include "Memory.h" +#include "Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; using namespace llvm::object; -using llvm::sys::fs::identify_magic; -using llvm::sys::fs::file_magic; namespace lld { namespace coff { @@ -36,130 +37,14 @@ StringRef SymbolBody::getName() { return Name; } -// Returns 1, 0 or -1 if this symbol should take precedence -// over the Other, tie or lose, respectively. -int SymbolBody::compare(SymbolBody *Other) { - Kind LK = kind(), RK = Other->kind(); - - // Normalize so that the smaller kind is on the left. - if (LK > RK) - return -Other->compare(this); - - // First handle comparisons between two different kinds. - if (LK != RK) { - if (RK > LastDefinedKind) { - if (LK == LazyKind && cast<Undefined>(Other)->WeakAlias) - return -1; - - // The LHS is either defined or lazy and so it wins. - assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!"); - return 1; - } - - // Bitcode has special complexities. - if (RK == DefinedBitcodeKind) { - auto *RHS = cast<DefinedBitcode>(Other); - - switch (LK) { - case DefinedCommonKind: - return 1; - - case DefinedRegularKind: - // As an approximation, regular symbols win over bitcode symbols, - // but we definitely have a conflict if the regular symbol is not - // replaceable and neither is the bitcode symbol. We do not - // replicate the rest of the symbol resolution logic here; symbol - // resolution will be done accurately after lowering bitcode symbols - // to regular symbols in addCombinedLTOObject(). - if (cast<DefinedRegular>(this)->isCOMDAT() || RHS->IsReplaceable) - return 1; - - // Fallthrough to the default of a tie otherwise. - default: - return 0; - } - } - - // Either of the object file kind will trump a higher kind. - if (LK <= LastDefinedCOFFKind) - return 1; - - // The remaining kind pairs are ties amongst defined symbols. - return 0; - } - - // Now handle the case where the kinds are the same. - switch (LK) { - case DefinedRegularKind: { - auto *LHS = cast<DefinedRegular>(this); - auto *RHS = cast<DefinedRegular>(Other); - if (LHS->isCOMDAT() && RHS->isCOMDAT()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return 0; - } - - case DefinedCommonKind: { - auto *LHS = cast<DefinedCommon>(this); - auto *RHS = cast<DefinedCommon>(Other); - if (LHS->getSize() == RHS->getSize()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return LHS->getSize() > RHS->getSize() ? 1 : -1; - } - - case DefinedBitcodeKind: { - auto *LHS = cast<DefinedBitcode>(this); - auto *RHS = cast<DefinedBitcode>(Other); - // If both are non-replaceable, we have a tie. - if (!LHS->IsReplaceable && !RHS->IsReplaceable) - return 0; - - // Non-replaceable symbols win, but even two replaceable symboles don't - // tie. If both symbols are replaceable, choice is arbitrary. - if (RHS->IsReplaceable && LHS->IsReplaceable) - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - return LHS->IsReplaceable ? -1 : 1; - } - - case LazyKind: { - // Don't tie, pick the earliest. - auto *LHS = cast<Lazy>(this); - auto *RHS = cast<Lazy>(Other); - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - } - - case UndefinedKind: { - auto *LHS = cast<Undefined>(this); - auto *RHS = cast<Undefined>(Other); - // Tie if both undefined symbols have different weak aliases. - if (LHS->WeakAlias && RHS->WeakAlias) { - if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName()) - return 0; - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - } - return LHS->WeakAlias ? 1 : -1; - } - - case DefinedLocalImportKind: - case DefinedImportThunkKind: - case DefinedImportDataKind: - case DefinedAbsoluteKind: - case DefinedRelativeKind: - // These all simply tie. - return 0; - } - llvm_unreachable("unknown symbol kind"); -} - -std::string SymbolBody::getDebugName() { - std::string N = getName().str(); - if (auto *D = dyn_cast<DefinedCOFF>(this)) { - N += " "; - N += D->File->getShortName(); - } else if (auto *D = dyn_cast<DefinedBitcode>(this)) { - N += " "; - N += D->File->getShortName(); - } - return N; +InputFile *SymbolBody::getFile() { + if (auto *Sym = dyn_cast<DefinedCOFF>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<DefinedBitcode>(this)) + return Sym->File; + if (auto *Sym = dyn_cast<Lazy>(this)) + return Sym->File; + return nullptr; } COFFSymbolRef DefinedCOFF::getCOFFSymbol() { @@ -174,44 +59,27 @@ DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine) : Defined(DefinedImportThunkKind, Name) { switch (Machine) { - case AMD64: Data.reset(new ImportThunkChunkX64(S)); return; - case I386: Data.reset(new ImportThunkChunkX86(S)); return; - case ARMNT: Data.reset(new ImportThunkChunkARM(S)); return; + case AMD64: Data = make<ImportThunkChunkX64>(S); return; + case I386: Data = make<ImportThunkChunkX86>(S); return; + case ARMNT: Data = make<ImportThunkChunkARM>(S); return; default: llvm_unreachable("unknown machine type"); } } -std::unique_ptr<InputFile> Lazy::getMember() { - MemoryBufferRef MBRef = File->getMember(&Sym); - - // getMember returns an empty buffer if the member was already - // read from the library. - if (MBRef.getBuffer().empty()) - return std::unique_ptr<InputFile>(nullptr); - - file_magic Magic = identify_magic(MBRef.getBuffer()); - if (Magic == file_magic::coff_import_library) - return std::unique_ptr<InputFile>(new ImportFile(MBRef)); - - std::unique_ptr<InputFile> Obj; - if (Magic == file_magic::coff_object) - Obj.reset(new ObjectFile(MBRef)); - else if (Magic == file_magic::bitcode) - Obj.reset(new BitcodeFile(MBRef)); - else - fatal("unknown file type: " + File->getName()); - - Obj->setParentName(File->getName()); - return Obj; -} - Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (SymbolBody *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias) - if (auto *D = dyn_cast<Defined>(A->repl())) + if (auto *D = dyn_cast<Defined>(A)) return D; return nullptr; } +// Returns a symbol name for an error message. +std::string toString(SymbolBody &B) { + if (Optional<std::string> S = demangle(B.getName())) + return ("\"" + *S + "\" (" + B.getName() + ")").str(); + return B.getName(); +} + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Symbols.h b/contrib/llvm/tools/lld/COFF/Symbols.h index f96c1fb3cc1d..bc9ad4aa8aff 100644 --- a/contrib/llvm/tools/lld/COFF/Symbols.h +++ b/contrib/llvm/tools/lld/COFF/Symbols.h @@ -12,6 +12,7 @@ #include "Chunks.h" #include "Config.h" +#include "Memory.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" @@ -32,15 +33,8 @@ class ArchiveFile; class BitcodeFile; class InputFile; class ObjectFile; -class SymbolBody; - -// A real symbol object, SymbolBody, is usually accessed indirectly -// through a Symbol. There's always one Symbol for each symbol name. -// The resolver updates SymbolBody pointers as it resolves symbols. -struct Symbol { - explicit Symbol(SymbolBody *P) : Body(P) {} - SymbolBody *Body; -}; +struct Symbol; +class SymbolTable; // The base class for real symbol classes. class SymbolBody { @@ -75,28 +69,19 @@ public: // Returns the symbol name. StringRef getName(); - // A SymbolBody has a backreference to a Symbol. Originally they are - // doubly-linked. A backreference will never change. But the pointer - // in the Symbol may be mutated by the resolver. If you have a - // pointer P to a SymbolBody and are not sure whether the resolver - // has chosen the object among other objects having the same name, - // you can access P->Backref->Body to get the resolver's result. - void setBackref(Symbol *P) { Backref = P; } - SymbolBody *repl() { return Backref ? Backref->Body : this; } - - // Decides which symbol should "win" in the symbol table, this or - // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if - // they are duplicate (conflicting) symbols. - int compare(SymbolBody *Other); + // Returns the file from which this symbol was created. + InputFile *getFile(); - // Returns a name of this symbol including source file name. - // Used only for debugging and logging. - std::string getDebugName(); + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast<SymbolBody *>(this)->symbol(); + } protected: + friend SymbolTable; explicit SymbolBody(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), - IsReplaceable(false), Name(N) {} + IsReplaceable(false), WrittenToSymtab(false), Name(N) {} const unsigned SymbolKind : 8; unsigned IsExternal : 1; @@ -107,8 +92,12 @@ protected: // This bit is used by the \c DefinedBitcode subclass. unsigned IsReplaceable : 1; +public: + // This bit is used by Writer::createSymbolAndStringTable(). + unsigned WrittenToSymtab : 1; + +protected: StringRef Name; - Symbol *Backref = nullptr; }; // The base class for any defined symbols, including absolute symbols, @@ -149,12 +138,13 @@ public: return S->kind() <= LastDefinedCOFFKind; } - int getFileIndex() { return File->Index; } + ObjectFile *getFile() { return File; } COFFSymbolRef getCOFFSymbol(); -protected: ObjectFile *File; + +protected: const coff_symbol_generic *Sym; }; @@ -194,7 +184,7 @@ public: uint64_t getRVA() { return Data->getRVA(); } private: - friend SymbolBody; + friend SymbolTable; uint64_t getSize() { return Sym->Value; } CommonChunk *Data; }; @@ -253,14 +243,12 @@ public: static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } - // Returns an object file for this symbol, or a nullptr if the file - // was already returned. - std::unique_ptr<InputFile> getMember(); + ArchiveFile *File; - int getFileIndex() { return File->Index; } +private: + friend SymbolTable; private: - ArchiveFile *File; const Archive::Symbol Sym; }; @@ -293,26 +281,22 @@ public: // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: - DefinedImportData(StringRef D, StringRef N, StringRef E, - const coff_import_header *H) - : Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) { + DefinedImportData(StringRef N, ImportFile *F) + : Defined(DefinedImportDataKind, N), File(F) { } static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } - uint64_t getRVA() { return Location->getRVA(); } - StringRef getDLLName() { return DLLName; } - StringRef getExternalName() { return ExternalName; } - void setLocation(Chunk *AddressTable) { Location = AddressTable; } - uint16_t getOrdinal() { return Hdr->OrdinalHint; } + uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } + StringRef getExternalName() { return File->ExternalName; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } private: - StringRef DLLName; - StringRef ExternalName; - const coff_import_header *Hdr; - Chunk *Location = nullptr; + ImportFile *File; }; // This class represents a symbol for a jump table entry which jumps @@ -329,10 +313,10 @@ public: } uint64_t getRVA() { return Data->getRVA(); } - Chunk *getChunk() { return Data.get(); } + Chunk *getChunk() { return Data; } private: - std::unique_ptr<Chunk> Data; + Chunk *Data; }; // If you have a symbol "__imp_foo" in your object file, a symbol name @@ -343,17 +327,17 @@ private: class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(S) {} + : Defined(DefinedLocalImportKind, N), Data(make<LocalImportChunk>(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; } - uint64_t getRVA() { return Data.getRVA(); } - Chunk *getChunk() { return &Data; } + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data; } private: - LocalImportChunk Data; + LocalImportChunk *Data; }; class DefinedBitcode : public Defined { @@ -361,6 +345,11 @@ class DefinedBitcode : public Defined { public: DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) : Defined(DefinedBitcodeKind, N), File(F) { + // IsReplaceable tracks whether the bitcode symbol may be replaced with some + // other (defined, common or bitcode) symbol. This is the case for common, + // comdat and weak external symbols. We try to replace bitcode symbols with + // "real" symbols (see SymbolTable::add{Regular,Bitcode}), and resolve the + // result against the real symbol from the combined LTO object. this->IsReplaceable = IsReplaceable; } @@ -368,7 +357,6 @@ public: return S->kind() == DefinedBitcodeKind; } -private: BitcodeFile *File; }; @@ -397,6 +385,52 @@ inline uint64_t Defined::getRVA() { llvm_unreachable("unknown symbol kind"); } +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + + // True if we've seen both a lazy and an undefined symbol with this symbol + // name, which means that we have enqueued an archive member load and should + // not load any more archive members to resolve the same symbol. + unsigned PendingArchiveLoad : 1; + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion<DefinedRegular, DefinedCommon, DefinedAbsolute, + DefinedRelative, Lazy, Undefined, + DefinedImportData, DefinedImportThunk, + DefinedLocalImport, DefinedBitcode> + Body; + + SymbolBody *body() { + return reinterpret_cast<SymbolBody *>(Body.buffer); + } + const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); } +}; + +template <typename T, typename... ArgT> +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward<ArgT>(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(isExternal()); + return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) - + offsetof(Symbol, Body)); +} + +std::string toString(SymbolBody &B); + } // namespace coff } // namespace lld diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp index d8077df95701..3e69aebbb424 100644 --- a/contrib/llvm/tools/lld/COFF/Writer.cpp +++ b/contrib/llvm/tools/lld/COFF/Writer.cpp @@ -7,13 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "Writer.h" #include "Config.h" #include "DLL.h" #include "Error.h" #include "InputFiles.h" +#include "Memory.h" +#include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" -#include "Writer.h" #include "lld/Core/Parallel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -21,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdio> @@ -42,6 +45,61 @@ static const int DOSStubSize = 64; static const int NumberfOfDataDirectory = 16; namespace { + +class DebugDirectoryChunk : public Chunk { +public: + DebugDirectoryChunk(const std::vector<std::unique_ptr<Chunk>> &R) + : Records(R) {} + + size_t getSize() const override { + return Records.size() * sizeof(debug_directory); + } + + void writeTo(uint8_t *B) const override { + auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff); + + for (const std::unique_ptr<Chunk> &Record : Records) { + D->Characteristics = 0; + D->TimeDateStamp = 0; + D->MajorVersion = 0; + D->MinorVersion = 0; + D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW; + D->SizeOfData = Record->getSize(); + D->AddressOfRawData = Record->getRVA(); + // TODO(compnerd) get the file offset + D->PointerToRawData = 0; + + ++D; + } + } + +private: + const std::vector<std::unique_ptr<Chunk>> &Records; +}; + +class CVDebugRecordChunk : public Chunk { + size_t getSize() const override { + return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1; + } + + void writeTo(uint8_t *B) const override { + // Save off the DebugInfo entry to backfill the file signature (build id) + // in Writer::writeBuildId + DI = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff); + + DI->Signature.CVSignature = OMF::Signature::PDB70; + + // variable sized field (PDB Path) + auto *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*DI)); + if (!Config->PDBPath.empty()) + memcpy(P, Config->PDBPath.data(), Config->PDBPath.size()); + P[Config->PDBPath.size()] = '\0'; + } + +public: + mutable codeview::DebugInfo *DI = nullptr; +}; + // The writer writes a SymbolTable result to a file. class Writer { public: @@ -62,6 +120,7 @@ private: void setSectionPermissions(); void writeSections(); void sortExceptionTable(); + void writeBuildId(); void applyRelocations(); llvm::Optional<coff_symbol16> createSymbol(Defined *D); @@ -76,9 +135,7 @@ private: std::map<StringRef, std::vector<DefinedImportData *>> binImports(); SymbolTable *Symtab; - std::unique_ptr<llvm::FileOutputBuffer> Buffer; - llvm::SpecificBumpPtrAllocator<OutputSection> CAlloc; - llvm::SpecificBumpPtrAllocator<BaserelChunk> BAlloc; + std::unique_ptr<FileOutputBuffer> Buffer; std::vector<OutputSection *> OutputSections; std::vector<char> Strtab; std::vector<llvm::object::coff_symbol16> OutputSymtab; @@ -87,6 +144,11 @@ private: EdataContents Edata; std::unique_ptr<SEHTableChunk> SEHTable; + std::unique_ptr<Chunk> DebugDirectory; + std::vector<std::unique_ptr<Chunk>> DebugRecords; + CVDebugRecordChunk *BuildId = nullptr; + ArrayRef<uint8_t> SectionTable; + uint64_t FileSize; uint32_t PointerToSymbolTable = 0; uint64_t SizeOfImage; @@ -239,6 +301,11 @@ void Writer::run() { fixSafeSEHSymbols(); writeSections(); sortExceptionTable(); + writeBuildId(); + + if (!Config->PDBPath.empty()) + createPDB(Config->PDBPath, Symtab, SectionTable); + if (auto EC = Buffer->commit()) fatal(EC, "failed to write the output file"); } @@ -274,7 +341,7 @@ void Writer::createSections() { StringRef Name = getOutputSection(Pair.first); OutputSection *&Sec = Sections[Name]; if (!Sec) { - Sec = new (CAlloc.Allocate()) OutputSection(Name); + Sec = make<OutputSection>(Name); OutputSections.push_back(Sec); } std::vector<Chunk *> &Chunks = Pair.second; @@ -286,25 +353,46 @@ void Writer::createSections() { } void Writer::createMiscChunks() { + OutputSection *RData = createSection(".rdata"); + // Create thunks for locally-dllimported symbols. if (!Symtab->LocalImportChunks.empty()) { - OutputSection *Sec = createSection(".rdata"); for (Chunk *C : Symtab->LocalImportChunks) - Sec->addChunk(C); + RData->addChunk(C); + } + + // Create Debug Information Chunks + if (Config->Debug) { + DebugDirectory = llvm::make_unique<DebugDirectoryChunk>(DebugRecords); + + // TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled + if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV)) { + auto Chunk = llvm::make_unique<CVDebugRecordChunk>(); + + BuildId = Chunk.get(); + DebugRecords.push_back(std::move(Chunk)); + } + + RData->addChunk(DebugDirectory.get()); + for (const std::unique_ptr<Chunk> &C : DebugRecords) + RData->addChunk(C.get()); } // Create SEH table. x86-only. if (Config->Machine != I386) return; + std::set<Defined *> Handlers; + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { if (!File->SEHCompat) return; for (SymbolBody *B : File->SEHandlers) - Handlers.insert(cast<Defined>(B->repl())); + Handlers.insert(cast<Defined>(B)); } + SEHTable.reset(new SEHTableChunk(Handlers)); - createSection(".rdata")->addChunk(SEHTable.get()); + RData->addChunk(SEHTable.get()); } // Create .idata section for the DLL-imported symbol table. @@ -340,7 +428,7 @@ void Writer::createImportTables() { Sec->addChunk(C); } if (!DelayIdata.empty()) { - Defined *Helper = cast<Defined>(Config->DelayLoadHelper->repl()); + Defined *Helper = cast<Defined>(Config->DelayLoadHelper); DelayIdata.create(Helper); OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) @@ -383,6 +471,10 @@ size_t Writer::addEntryToStringTable(StringRef Str) { } Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { + // Relative symbols are unrepresentable in a COFF symbol table. + if (isa<DefinedRelative>(Def)) + return None; + if (auto *D = dyn_cast<DefinedRegular>(Def)) if (!D->getChunk()->isLive()) return None; @@ -409,7 +501,6 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { switch (Def->kind()) { case SymbolBody::DefinedAbsoluteKind: - case SymbolBody::DefinedRelativeKind: Sym.Value = Def->getRVA(); Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; @@ -445,13 +536,11 @@ void Writer::createSymbolAndStringTable() { for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) for (SymbolBody *B : File->getSymbols()) if (auto *D = dyn_cast<Defined>(B)) - if (Optional<coff_symbol16> Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - - for (ImportFile *File : Symtab->ImportFiles) - for (SymbolBody *B : File->getSymbols()) - if (Optional<coff_symbol16> Sym = createSymbol(cast<Defined>(B))) - OutputSymtab.push_back(*Sym); + if (!D->WrittenToSymtab) { + D->WrittenToSymtab = true; + if (Optional<coff_symbol16> Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + } OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. @@ -542,7 +631,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() { PE->SizeOfImage = SizeOfImage; PE->SizeOfHeaders = SizeOfHeaders; if (!Config->NoEntry) { - Defined *Entry = cast<Defined>(Config->Entry->repl()); + Defined *Entry = cast<Defined>(Config->Entry); PE->AddressOfEntryPoint = Entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (Config->Machine == ARMNT) @@ -584,33 +673,32 @@ template <typename PEHeaderTy> void Writer::writeHeader() { Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); Dir[IAT].Size = Idata.getIATSize(); } - if (!DelayIdata.empty()) { - Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = - DelayIdata.getDirRVA(); - Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); - } if (OutputSection *Sec = findSection(".rsrc")) { Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); } - if (OutputSection *Sec = findSection(".reloc")) { - Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); - } if (OutputSection *Sec = findSection(".pdata")) { Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); } + if (OutputSection *Sec = findSection(".reloc")) { + Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { - if (Defined *B = dyn_cast<Defined>(Sym->Body)) { + if (Defined *B = dyn_cast<Defined>(Sym->body())) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[TLS_TABLE].Size = Config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } + if (Config->Debug) { + Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA(); + Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); + } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { - if (auto *B = dyn_cast<DefinedRegular>(Sym->Body)) { + if (auto *B = dyn_cast<DefinedRegular>(Sym->body())) { SectionChunk *SC = B->getChunk(); assert(B->getRVA() >= SC->getRVA()); uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); @@ -626,12 +714,19 @@ template <typename PEHeaderTy> void Writer::writeHeader() { Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; } } + if (!DelayIdata.empty()) { + Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = + DelayIdata.getDirRVA(); + Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); + } // Write section table for (OutputSection *Sec : OutputSections) { Sec->writeHeaderTo(Buf); Buf += sizeof(coff_section); } + SectionTable = ArrayRef<uint8_t>( + Buf - OutputSections.size() * sizeof(coff_section), Buf); if (OutputSymtab.empty()) return; @@ -660,8 +755,10 @@ void Writer::openFile(StringRef Path) { void Writer::fixSafeSEHSymbols() { if (!SEHTable) return; - Config->SEHTable->setRVA(SEHTable->getRVA()); - Config->SEHCount->setVA(SEHTable->getSize() / 4); + if (auto *T = dyn_cast<DefinedRelative>(Config->SEHTable->body())) + T->setRVA(SEHTable->getRVA()); + if (auto *C = dyn_cast<DefinedAbsolute>(Config->SEHCount->body())) + C->setVA(SEHTable->getSize() / 4); } // Handles /section options to allow users to overwrite @@ -715,6 +812,30 @@ void Writer::sortExceptionTable() { errs() << "warning: don't know how to handle .pdata.\n"; } +// Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us +// to get reproducible builds. +void Writer::writeBuildId() { + // There is nothing to backfill if BuildId was not setup. + if (BuildId == nullptr) + return; + + MD5 Hash; + MD5::MD5Result Res; + + Hash.update(ArrayRef<uint8_t>{Buffer->getBufferStart(), + Buffer->getBufferEnd()}); + Hash.final(Res); + + assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 && + "only PDB 7.0 is supported"); + assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) && + "signature size mismatch"); + memcpy(BuildId->DI->PDB70.Signature, Res, + sizeof(codeview::PDB70DebugInfo::Signature)); + // TODO(compnerd) track the Age + BuildId->DI->PDB70.Age = 1; +} + OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->getName() == Name) @@ -744,16 +865,13 @@ OutputSection *Writer::createSection(StringRef Name) { uint32_t Perms = StringSwitch<uint32_t>(Name) .Case(".bss", BSS | R | W) .Case(".data", DATA | R | W) - .Case(".didat", DATA | R) - .Case(".edata", DATA | R) - .Case(".idata", DATA | R) - .Case(".rdata", DATA | R) + .Cases(".didat", ".edata", ".idata", ".rdata", DATA | R) .Case(".reloc", DATA | DISCARDABLE | R) .Case(".text", CODE | R | X) .Default(0); if (!Perms) llvm_unreachable("unknown section name"); - auto Sec = new (CAlloc.Allocate()) OutputSection(Name); + auto Sec = make<OutputSection>(Name); Sec->addPermissions(Perms); OutputSections.push_back(Sec); return Sec; @@ -784,13 +902,11 @@ void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) { uint32_t P = V[J].RVA & Mask; if (P == Page) continue; - BaserelChunk *Buf = BAlloc.Allocate(); - Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); I = J; Page = P; } if (I == J) return; - BaserelChunk *Buf = BAlloc.Allocate(); - Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); } diff --git a/contrib/llvm/tools/lld/COFF/Writer.h b/contrib/llvm/tools/lld/COFF/Writer.h index 0473315ae50a..0d26090177d8 100644 --- a/contrib/llvm/tools/lld/COFF/Writer.h +++ b/contrib/llvm/tools/lld/COFF/Writer.h @@ -14,9 +14,7 @@ namespace lld { namespace coff { - -class Chunk; -class OutputSection; +class SymbolTable; void writeResult(SymbolTable *T); diff --git a/contrib/llvm/tools/lld/ELF/CMakeLists.txt b/contrib/llvm/tools/lld/ELF/CMakeLists.txt index a1b65adc7400..2e9d2b941fd9 100644 --- a/contrib/llvm/tools/lld/ELF/CMakeLists.txt +++ b/contrib/llvm/tools/lld/ELF/CMakeLists.txt @@ -2,24 +2,30 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + add_lld_library(lldELF Driver.cpp DriverUtils.cpp EhFrame.cpp Error.cpp + GdbIndex.cpp ICF.cpp InputFiles.cpp InputSection.cpp LTO.cpp LinkerScript.cpp MarkLive.cpp + Mips.cpp OutputSections.cpp Relocations.cpp ScriptParser.cpp Strings.cpp - SymbolListFile.cpp SymbolTable.cpp Symbols.cpp + SyntheticSections.cpp Target.cpp Thunks.cpp Writer.cpp @@ -31,6 +37,8 @@ add_lld_library(lldELF BitWriter Codegen Core + DebugInfoDWARF + Demangle IPO Linker LTO @@ -44,7 +52,10 @@ add_lld_library(lldELF LINK_LIBS lldConfig + lldCore ${PTHREAD_LIB} - ) -add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen) + DEPENDS + ELFOptionsTableGen + ${tablegen_deps} + ) diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h index 2ccd95e88775..b828cdb25047 100644 --- a/contrib/llvm/tools/lld/ELF/Config.h +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/ELF.h" #include <vector> @@ -30,21 +31,36 @@ enum ELFKind { ELF64BEKind }; -enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring }; +// For --build-id. +enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; -enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore }; +// For --discard-{all,locals,none} and --retain-symbols-file. +enum class DiscardPolicy { Default, All, Locals, RetainFile, None }; + +// For --strip-{all,debug}. +enum class StripPolicy { None, All, Debug }; + +// For --unresolved-symbols. +enum class UnresolvedPolicy { NoUndef, ReportError, Warn, Ignore }; + +// For --sort-section and linkerscript sorting rules. +enum class SortSectionPolicy { Default, None, Alignment, Name, Priority }; + +// For --target2 +enum class Target2Policy { Abs, Rel, GotRel }; struct SymbolVersion { llvm::StringRef Name; bool IsExternCpp; + bool HasWildcard; }; // This struct contains symbols version definition that // can be found in version script if it is used for link. struct VersionDefinition { - VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {} + VersionDefinition(llvm::StringRef Name, uint16_t Id) : Name(Name), Id(Id) {} llvm::StringRef Name; - size_t Id; + uint16_t Id; std::vector<SymbolVersion> Globals; size_t NameOff; // Offset in string table. }; @@ -54,75 +70,92 @@ struct VersionDefinition { // and such fields have the same name as the corresponding options. // Most fields are initialized by the driver. struct Configuration { - Symbol *EntrySym = nullptr; InputFile *FirstElf = nullptr; + uint8_t OSABI = 0; + llvm::StringMap<uint64_t> SectionStartMap; llvm::StringRef DynamicLinker; llvm::StringRef Entry; llvm::StringRef Emulation; llvm::StringRef Fini; llvm::StringRef Init; - llvm::StringRef LtoAAPipeline; - llvm::StringRef LtoNewPmPasses; + llvm::StringRef LTOAAPipeline; + llvm::StringRef LTONewPmPasses; llvm::StringRef OutputFile; llvm::StringRef SoName; llvm::StringRef Sysroot; + llvm::StringSet<> RetainSymbolsFile; std::string RPath; std::vector<VersionDefinition> VersionDefinitions; - std::vector<llvm::StringRef> DynamicList; + std::vector<llvm::StringRef> AuxiliaryList; std::vector<llvm::StringRef> SearchPaths; + std::vector<llvm::StringRef> SymbolOrderingFile; std::vector<llvm::StringRef> Undefined; std::vector<SymbolVersion> VersionScriptGlobals; + std::vector<SymbolVersion> VersionScriptLocals; std::vector<uint8_t> BuildIdVector; bool AllowMultipleDefinition; bool AsNeeded = false; bool Bsymbolic; bool BsymbolicFunctions; + bool ColorDiagnostics = false; bool Demangle = true; bool DisableVerify; - bool DiscardAll; - bool DiscardLocals; - bool DiscardNone; bool EhFrameHdr; bool EnableNewDtags; bool ExportDynamic; bool FatalWarnings; bool GcSections; + bool GdbIndex; bool GnuHash = false; bool ICF; bool Mips64EL = false; + bool MipsN32Abi = false; bool NoGnuUnique; bool NoUndefinedVersion; + bool Nostdlib; + bool OFormatBinary; + bool OMagic; bool Pic; bool Pie; bool PrintGcSections; bool Rela; bool Relocatable; bool SaveTemps; + bool SingleRoRx; bool Shared; bool Static = false; - bool StripAll; - bool StripDebug; bool SysvHash = true; + bool Target1Rel; bool Threads; bool Trace; bool Verbose; bool WarnCommon; + bool WarnMissingEntry; bool ZCombreloc; - bool ZExecStack; + bool ZExecstack; bool ZNodelete; bool ZNow; bool ZOrigin; bool ZRelro; + bool ExitEarly; + bool ZWxneeded; + DiscardPolicy Discard; + SortSectionPolicy SortSection; + StripPolicy Strip = StripPolicy::None; UnresolvedPolicy UnresolvedSymbols; + Target2Policy Target2 = Target2Policy::GotRel; BuildIdKind BuildId = BuildIdKind::None; ELFKind EKind = ELFNoneKind; uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; uint16_t EMachine = llvm::ELF::EM_NONE; - uint64_t EntryAddr = -1; + uint64_t ErrorLimit = 20; uint64_t ImageBase; - unsigned LtoJobs; - unsigned LtoO; + uint64_t MaxPageSize; + uint64_t ZStackSize; + unsigned LTOPartitions; + unsigned LTOO; unsigned Optimize; + unsigned ThinLTOJobs; }; // The only instance of Configuration struct. diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp index c6ca2639236f..a11dbc7cc47f 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.cpp +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -14,14 +14,17 @@ #include "InputFiles.h" #include "InputSection.h" #include "LinkerScript.h" +#include "Memory.h" #include "Strings.h" -#include "SymbolListFile.h" #include "SymbolTable.h" #include "Target.h" +#include "Threads.h" #include "Writer.h" +#include "lld/Config/Version.h" #include "lld/Driver/Driver.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> @@ -38,48 +41,59 @@ using namespace lld::elf; Configuration *elf::Config; LinkerDriver *elf::Driver; -bool elf::link(ArrayRef<const char *> Args, raw_ostream &Error) { - HasError = false; +BumpPtrAllocator elf::BAlloc; +StringSaver elf::Saver{BAlloc}; +std::vector<SpecificAllocBase *> elf::SpecificAllocBase::Instances; + +bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly, + raw_ostream &Error) { + ErrorCount = 0; ErrorOS = &Error; + Argv0 = Args[0]; - Configuration C; - LinkerDriver D; - ScriptConfiguration SC; - Config = &C; - Driver = &D; - ScriptConfig = &SC; + Config = make<Configuration>(); + Driver = make<LinkerDriver>(); + ScriptConfig = make<ScriptConfiguration>(); - Driver->main(Args); - return !HasError; + Driver->main(Args, CanExitEarly); + freeArena(); + return !ErrorCount; } // Parses a linker -m option. -static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { - if (S.endswith("_fbsd")) +static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) { + uint8_t OSABI = 0; + StringRef S = Emul; + if (S.endswith("_fbsd")) { S = S.drop_back(5); + OSABI = ELFOSABI_FREEBSD; + } std::pair<ELFKind, uint16_t> Ret = StringSwitch<std::pair<ELFKind, uint16_t>>(S) - .Case("aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32btsmipn32", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) .Case("elf32ppc", {ELF32BEKind, EM_PPC}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) .Case("elf_i386", {ELF32LEKind, EM_386}) - .Case("elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Default({ELFNoneKind, EM_NONE}); if (Ret.first == ELFNoneKind) { if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") - error("Windows targets are not supported on the ELF frontend: " + S); + error("Windows targets are not supported on the ELF frontend: " + Emul); else - error("unknown emulation: " + S); + error("unknown emulation: " + Emul); } - return Ret; + return std::make_tuple(Ret.first, Ret.second, OSABI); } // Returns slices of MB by parsing MB as an archive file. @@ -87,25 +101,28 @@ static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) { std::vector<MemoryBufferRef> LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { std::unique_ptr<Archive> File = - check(Archive::create(MB), "failed to parse archive"); + check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); std::vector<MemoryBufferRef> V; - Error Err; + Error Err = Error::success(); for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { - Archive::Child C = check(COrErr, "could not get the child of the archive " + - File->getFileName()); + Archive::Child C = + check(COrErr, MB.getBufferIdentifier() + + ": could not get the child of the archive"); MemoryBufferRef MBRef = check(C.getMemoryBufferRef(), - "could not get the buffer for a child of the archive " + - File->getFileName()); + MB.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); V.push_back(MBRef); } if (Err) - Error(Err); + fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + + toString(std::move(Err))); // Take ownership of memory buffers created for members of thin archives. for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) - OwningMBs.push_back(std::move(MB)); + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); return V; } @@ -114,25 +131,28 @@ LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { // Newly created memory buffers are owned by this driver. void LinkerDriver::addFile(StringRef Path) { using namespace sys::fs; - if (Config->Verbose) - outs() << Path << "\n"; Optional<MemoryBufferRef> Buffer = readFile(Path); if (!Buffer.hasValue()) return; MemoryBufferRef MBRef = *Buffer; + if (InBinary) { + Files.push_back(make<BinaryFile>(MBRef)); + return; + } + switch (identify_magic(MBRef.getBuffer())) { case file_magic::unknown: readLinkerScript(MBRef); return; case file_magic::archive: - if (WholeArchive) { + if (InWholeArchive) { for (MemoryBufferRef MB : getArchiveMembers(MBRef)) Files.push_back(createObjectFile(MB, Path)); return; } - Files.push_back(make_unique<ArchiveFile>(MBRef)); + Files.push_back(make<ArchiveFile>(MBRef)); return; case file_magic::elf_shared_object: if (Config->Relocatable) { @@ -143,13 +163,16 @@ void LinkerDriver::addFile(StringRef Path) { return; default: if (InLib) - Files.push_back(make_unique<LazyObjectFile>(MBRef)); + Files.push_back(make<LazyObjectFile>(MBRef)); else Files.push_back(createObjectFile(MBRef)); } } Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { + if (Config->Verbose) + outs() << Path << "\n"; + auto MBOrErr = MemoryBuffer::getFile(Path); if (auto EC = MBOrErr.getError()) { error(EC, "cannot open " + Path); @@ -157,7 +180,7 @@ Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { } std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; MemoryBufferRef MBRef = MB->getMemBufferRef(); - OwningMBs.push_back(std::move(MB)); // take MB ownership + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership if (Cpio) Cpio->append(relativeToRoot(Path), MBRef.getBuffer()); @@ -167,11 +190,10 @@ Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) { // Add a given library by searching it from input search paths. void LinkerDriver::addLibrary(StringRef Name) { - std::string Path = searchLibrary(Name); - if (Path.empty()) - error("unable to find library -l" + Name); + if (Optional<std::string> Path = searchLibrary(Name)) + addFile(*Path); else - addFile(Path); + error("unable to find library -l" + Name); } // This function is called on startup. We need this for LTO since @@ -184,12 +206,6 @@ static void initLLVM(opt::InputArgList &Args) { InitializeAllAsmPrinters(); InitializeAllAsmParsers(); - // This is a flag to discard all but GlobalValue names. - // We want to enable it by default because it saves memory. - // Disable it only when a developer option (-save-temps) is given. - Driver->Context.setDiscardValueNames(!Config->SaveTemps); - Driver->Context.enableDebugTypeODRUniquing(); - // Parse and evaluate -mllvm options. std::vector<const char *> V; V.push_back("lld (LLVM option parsing)"); @@ -206,9 +222,6 @@ static void checkOptions(opt::InputArgList &Args) { if (Config->EMachine == EM_MIPS && Config->GnuHash) error("the .gnu.hash section is not compatible with the MIPS target."); - if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty()) - error("-e option is not valid for AMDGPU."); - if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); @@ -224,8 +237,8 @@ static void checkOptions(opt::InputArgList &Args) { } } -static StringRef -getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") { +static StringRef getString(opt::InputArgList &Args, unsigned Key, + StringRef Default = "") { if (auto *Arg = Args.getLastArg(Key)) return Arg->getValue(); return Default; @@ -254,33 +267,64 @@ static bool hasZOption(opt::InputArgList &Args, StringRef Key) { return false; } -void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { +static uint64_t getZOptionValue(opt::InputArgList &Args, StringRef Key, + uint64_t Default) { + for (auto *Arg : Args.filtered(OPT_z)) { + StringRef Value = Arg->getValue(); + size_t Pos = Value.find("="); + if (Pos != StringRef::npos && Key == Value.substr(0, Pos)) { + Value = Value.substr(Pos + 1); + uint64_t Result; + if (Value.getAsInteger(0, Result)) + error("invalid " + Key + ": " + Value); + return Result; + } + } + return Default; +} + +void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { ELFOptTable Parser; opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + + // Interpret this flag early because error() depends on them. + Config->ErrorLimit = getInteger(Args, OPT_error_limit, 20); + + // Handle -help if (Args.hasArg(OPT_help)) { printHelp(ArgsArr[0]); return; } - if (Args.hasArg(OPT_version)) { - outs() << getVersionString(); + + // GNU linkers disagree here. Though both -version and -v are mentioned + // in help to print the version information, GNU ld just normally exits, + // while gold can continue linking. We are compatible with ld.bfd here. + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) + outs() << getLLDVersion() << "\n"; + if (Args.hasArg(OPT_version)) return; - } + + Config->ExitEarly = CanExitEarly && !Args.hasArg(OPT_full_shutdown); if (const char *Path = getReproduceOption(Args)) { // Note that --reproduce is a debug option so you can ignore it // if you are trying to understand the whole picture of the code. - Cpio.reset(CpioFile::create(Path)); - if (Cpio) { + ErrorOr<CpioFile *> F = CpioFile::create(Path); + if (F) { + Cpio.reset(*F); Cpio->append("response.txt", createResponseFile(Args)); - Cpio->append("version.txt", getVersionString()); - } + Cpio->append("version.txt", getLLDVersion() + "\n"); + } else + error(F.getError(), + Twine("--reproduce: failed to open ") + Path + ".cpio"); } readConfigs(Args); initLLVM(Args); createFiles(Args); + inferMachineType(); checkOptions(Args); - if (HasError) + if (ErrorCount) return; switch (Config->EKind) { @@ -297,7 +341,7 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) { link<ELF64BE>(Args); return; default: - error("-m or at least a .o file required"); + llvm_unreachable("unknown Config->EKind"); } } @@ -314,10 +358,115 @@ static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { if (S == "ignore-all" || S == "ignore-in-object-files") return UnresolvedPolicy::Ignore; if (S == "ignore-in-shared-libs" || S == "report-all") - return UnresolvedPolicy::Error; + return UnresolvedPolicy::ReportError; error("unknown --unresolved-symbols value: " + S); } - return UnresolvedPolicy::Error; + return UnresolvedPolicy::ReportError; +} + +static Target2Policy getTarget2Option(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_target2)) { + StringRef S = Arg->getValue(); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); + } + return Target2Policy::GotRel; +} + +static bool isOutputFormatBinary(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_oformat)) { + StringRef S = Arg->getValue(); + if (S == "binary") + return true; + error("unknown --oformat value: " + S); + } + return false; +} + +static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, + bool Default) { + if (auto *Arg = Args.getLastArg(K1, K2)) + return Arg->getOption().getID() == K1; + return Default; +} + +static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { + if (Config->Relocatable) + return DiscardPolicy::None; + auto *Arg = + Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); + if (!Arg) + return DiscardPolicy::Default; + if (Arg->getOption().getID() == OPT_discard_all) + return DiscardPolicy::All; + if (Arg->getOption().getID() == OPT_discard_locals) + return DiscardPolicy::Locals; + return DiscardPolicy::None; +} + +static StripPolicy getStripOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug)) { + if (Arg->getOption().getID() == OPT_strip_all) + return StripPolicy::All; + return StripPolicy::Debug; + } + return StripPolicy::None; +} + +static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) { + uint64_t VA = 0; + if (S.startswith("0x")) + S = S.drop_front(2); + if (S.getAsInteger(16, VA)) + error("invalid argument: " + stringize(Arg)); + return VA; +} + +static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) { + StringMap<uint64_t> Ret; + for (auto *Arg : Args.filtered(OPT_section_start)) { + StringRef Name; + StringRef Addr; + std::tie(Name, Addr) = StringRef(Arg->getValue()).split('='); + Ret[Name] = parseSectionAddress(Addr, Arg); + } + + if (auto *Arg = Args.getLastArg(OPT_Ttext)) + Ret[".text"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tdata)) + Ret[".data"] = parseSectionAddress(Arg->getValue(), Arg); + if (auto *Arg = Args.getLastArg(OPT_Tbss)) + Ret[".bss"] = parseSectionAddress(Arg->getValue(), Arg); + return Ret; +} + +static SortSectionPolicy getSortKind(opt::InputArgList &Args) { + StringRef S = getString(Args, OPT_sort_section); + if (S == "alignment") + return SortSectionPolicy::Alignment; + if (S == "name") + return SortSectionPolicy::Name; + if (!S.empty()) + error("unknown --sort-section rule: " + S); + return SortSectionPolicy::Default; +} + +static std::vector<StringRef> getLines(MemoryBufferRef MB) { + SmallVector<StringRef, 0> Arr; + MB.getBuffer().split(Arr, '\n'); + + std::vector<StringRef> Ret; + for (StringRef S : Arr) { + S = S.trim(); + if (!S.empty()) + Ret.push_back(S); + } + return Ret; } // Initializes Config members by the command line options. @@ -334,34 +483,37 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_m)) { // Parse ELF{32,64}{LE,BE} and CPU type. StringRef S = Arg->getValue(); - std::tie(Config->EKind, Config->EMachine) = parseEmulation(S); + std::tie(Config->EKind, Config->EMachine, Config->OSABI) = + parseEmulation(S); + Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); Config->Emulation = S; } Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); - Config->Demangle = !Args.hasArg(OPT_no_demangle); + Config->Demangle = getArg(Args, OPT_demangle, OPT_no_demangle, true); Config->DisableVerify = Args.hasArg(OPT_disable_verify); - Config->DiscardAll = Args.hasArg(OPT_discard_all); - Config->DiscardLocals = Args.hasArg(OPT_discard_locals); - Config->DiscardNone = Args.hasArg(OPT_discard_none); Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); - Config->GcSections = Args.hasArg(OPT_gc_sections); + Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); + Config->GdbIndex = Args.hasArg(OPT_gdb_index); Config->ICF = Args.hasArg(OPT_icf); Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); - Config->Pie = Args.hasArg(OPT_pie); + Config->Nostdlib = Args.hasArg(OPT_nostdlib); + Config->OMagic = Args.hasArg(OPT_omagic); + Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->Discard = getDiscardOption(Args); Config->SaveTemps = Args.hasArg(OPT_save_temps); + Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); Config->Shared = Args.hasArg(OPT_shared); - Config->StripAll = Args.hasArg(OPT_strip_all); - Config->StripDebug = Args.hasArg(OPT_strip_debug); - Config->Threads = Args.hasArg(OPT_threads); + Config->Target1Rel = getArg(Args, OPT_target1_rel, OPT_target1_abs, false); + Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); Config->Trace = Args.hasArg(OPT_trace); Config->Verbose = Args.hasArg(OPT_verbose); Config->WarnCommon = Args.hasArg(OPT_warn_common); @@ -370,33 +522,47 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->Entry = getString(Args, OPT_entry); Config->Fini = getString(Args, OPT_fini, "_fini"); Config->Init = getString(Args, OPT_init, "_init"); - Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline); - Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); Config->OutputFile = getString(Args, OPT_o); Config->SoName = getString(Args, OPT_soname); Config->Sysroot = getString(Args, OPT_sysroot); Config->Optimize = getInteger(Args, OPT_O, 1); - Config->LtoO = getInteger(Args, OPT_lto_O, 2); - if (Config->LtoO > 3) + Config->LTOO = getInteger(Args, OPT_lto_O, 2); + if (Config->LTOO > 3) error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); - Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1); - if (Config->LtoJobs == 0) - error("number of threads must be > 0"); + Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); + if (Config->LTOPartitions == 0) + error("--lto-partitions: number of threads must be > 0"); + Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); + if (Config->ThinLTOJobs == 0) + error("--thinlto-jobs: number of threads must be > 0"); Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); - Config->ZExecStack = hasZOption(Args, "execstack"); + Config->ZExecstack = hasZOption(Args, "execstack"); Config->ZNodelete = hasZOption(Args, "nodelete"); Config->ZNow = hasZOption(Args, "now"); Config->ZOrigin = hasZOption(Args, "origin"); Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZStackSize = getZOptionValue(Args, "stack-size", -1); + Config->ZWxneeded = hasZOption(Args, "wxneeded"); - if (Config->Relocatable) - Config->StripAll = false; + Config->OFormatBinary = isOutputFormatBinary(Args); + Config->SectionStartMap = getSectionStartMap(Args); + Config->SortSection = getSortKind(Args); + Config->Target2 = getTarget2Option(Args); + Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + + // --omagic is an option to create old-fashioned executables in which + // .text segments are writable. Today, the option is still in use to + // create special-purpose programs such as boot loaders. It doesn't + // make sense to create PT_GNU_RELRO for such executables. + if (Config->OMagic) + Config->ZRelro = false; - // --strip-all implies --strip-debug. - if (Config->StripAll) - Config->StripDebug = true; + if (!Config->Relocatable) + Config->Strip = getStripOption(Args); // Config->Pic is true if we are generating position-independent code. Config->Pic = Config->Pie || Config->Shared; @@ -414,13 +580,15 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { // Parse --build-id or --build-id=<style>. if (Args.hasArg(OPT_build_id)) - Config->BuildId = BuildIdKind::Fnv1; + Config->BuildId = BuildIdKind::Fast; if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { StringRef S = Arg->getValue(); if (S == "md5") { Config->BuildId = BuildIdKind::Md5; - } else if (S == "sha1") { + } else if (S == "sha1" || S == "tree") { Config->BuildId = BuildIdKind::Sha1; + } else if (S == "uuid") { + Config->BuildId = BuildIdKind::Uuid; } else if (S == "none") { Config->BuildId = BuildIdKind::None; } else if (S.startswith("0x")) { @@ -431,21 +599,58 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { } } + for (auto *Arg : Args.filtered(OPT_auxiliary)) + Config->AuxiliaryList.push_back(Arg->getValue()); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + for (auto *Arg : Args.filtered(OPT_undefined)) Config->Undefined.push_back(Arg->getValue()); - Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); - if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) - parseDynamicList(*Buffer); + readDynamicList(*Buffer); + + if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + Config->SymbolOrderingFile = getLines(*Buffer); + + // If --retain-symbol-file is used, we'll retail only the symbols listed in + // the file and discard all others. + if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { + Config->Discard = DiscardPolicy::RetainFile; + if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) + for (StringRef S : getLines(*Buffer)) + Config->RetainSymbolsFile.insert(S); + } for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) - Config->DynamicList.push_back(Arg->getValue()); + Config->VersionScriptGlobals.push_back( + {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + + // Dynamic lists are a simplified linker script that doesn't need the + // "global:" and implicitly ends with a "local:*". Set the variables needed to + // simulate that. + if (Args.hasArg(OPT_dynamic_list) || Args.hasArg(OPT_export_dynamic_symbol)) { + Config->ExportDynamic = true; + if (!Config->Shared) + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + } if (auto *Arg = Args.getLastArg(OPT_version_script)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) - parseVersionScript(*Buffer); + readVersionScript(*Buffer); +} + +// Returns a value of "-format" option. +static bool getBinaryOption(StringRef S) { + if (S == "binary") + return true; + if (S == "elf" || S == "default") + return false; + error("unknown -format value: " + S + + " (supported formats: elf, default, binary)"); + return false; } void LinkerDriver::createFiles(opt::InputArgList &Args) { @@ -454,14 +659,20 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { case OPT_l: addLibrary(Arg->getValue()); break; - case OPT_alias_script_T: case OPT_INPUT: - case OPT_script: addFile(Arg->getValue()); break; + case OPT_alias_script_T: + case OPT_script: + if (Optional<MemoryBufferRef> MB = readFile(Arg->getValue())) + readLinkerScript(*MB); + break; case OPT_as_needed: Config->AsNeeded = true; break; + case OPT_format: + InBinary = getBinaryOption(Arg->getValue()); + break; case OPT_no_as_needed: Config->AsNeeded = false; break; @@ -472,10 +683,10 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { Config->Static = false; break; case OPT_whole_archive: - WholeArchive = true; + InWholeArchive = true; break; case OPT_no_whole_archive: - WholeArchive = false; + InWholeArchive = false; break; case OPT_start_lib: InLib = true; @@ -486,19 +697,55 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { } } - if (Files.empty() && !HasError) - error("no input files."); + if (Files.empty() && ErrorCount == 0) + error("no input files"); +} - // If -m <machine_type> was not given, infer it from object files. - if (Config->EKind == ELFNoneKind) { - for (std::unique_ptr<InputFile> &F : Files) { - if (F->EKind == ELFNoneKind) - continue; - Config->EKind = F->EKind; - Config->EMachine = F->EMachine; - break; - } +// If -m <machine_type> was not given, infer it from object files. +void LinkerDriver::inferMachineType() { + if (Config->EKind != ELFNoneKind) + return; + + for (InputFile *F : Files) { + if (F->EKind == ELFNoneKind) + continue; + Config->EKind = F->EKind; + Config->EMachine = F->EMachine; + Config->OSABI = F->OSABI; + Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F); + return; } + error("target emulation unknown: -m or at least one .o file required"); +} + +// Parse -z max-page-size=<value>. The default value is defined by +// each target. +static uint64_t getMaxPageSize(opt::InputArgList &Args) { + uint64_t Val = + getZOptionValue(Args, "max-page-size", Target->DefaultMaxPageSize); + if (!isPowerOf2_64(Val)) + error("max-page-size: value isn't a power of 2"); + return Val; +} + +// Parses -image-base option. +static uint64_t getImageBase(opt::InputArgList &Args) { + // Use default if no -image-base option is given. + // Because we are using "Target" here, this function + // has to be called after the variable is initialized. + auto *Arg = Args.getLastArg(OPT_image_base); + if (!Arg) + return Config->Pic ? 0 : Target->DefaultImageBase; + + StringRef S = Arg->getValue(); + uint64_t V; + if (S.getAsInteger(0, V)) { + error("-image-base: number expected, but got " + S); + return 0; + } + if ((V % Config->MaxPageSize) != 0) + warn("-image-base: address isn't multiple of page size: " + S); + return V; } // Do actual linking. Note that when this function is called, @@ -506,66 +753,70 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { SymbolTable<ELFT> Symtab; elf::Symtab<ELFT>::X = &Symtab; + Target = createTarget(); + ScriptBase = Script<ELFT>::X = make<LinkerScript<ELFT>>(); - std::unique_ptr<TargetInfo> TI(createTarget()); - Target = TI.get(); - LinkerScript<ELFT> LS; - Script<ELFT>::X = &LS; - - Config->Rela = ELFT::Is64Bits || Config->EMachine == EM_X86_64; + Config->Rela = + ELFT::Is64Bits || Config->EMachine == EM_X86_64 || Config->MipsN32Abi; Config->Mips64EL = (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); - - // Add entry symbol. Note that AMDGPU binaries have no entry points. - if (Config->Entry.empty() && !Config->Shared && !Config->Relocatable && - Config->EMachine != EM_AMDGPU) - Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + Config->MaxPageSize = getMaxPageSize(Args); + Config->ImageBase = getImageBase(Args); // Default output filename is "a.out" by the Unix tradition. if (Config->OutputFile.empty()) Config->OutputFile = "a.out"; + // Use default entry point name if no name was given via the command + // line nor linker scripts. For some reason, MIPS entry point name is + // different from others. + Config->WarnMissingEntry = + (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable)); + if (Config->Entry.empty() && !Config->Relocatable) + Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; + // Handle --trace-symbol. for (auto *Arg : Args.filtered(OPT_trace_symbol)) Symtab.trace(Arg->getValue()); - // Set either EntryAddr (if S is a number) or EntrySym (otherwise). - if (!Config->Entry.empty()) { - StringRef S = Config->Entry; - if (S.getAsInteger(0, Config->EntryAddr)) - Config->EntrySym = Symtab.addUndefined(S); - } + // Add all files to the symbol table. This will add almost all + // symbols that we need to the symbol table. + for (InputFile *F : Files) + Symtab.addFile(F); - // Initialize Config->ImageBase. - if (auto *Arg = Args.getLastArg(OPT_image_base)) { - StringRef S = Arg->getValue(); - if (S.getAsInteger(0, Config->ImageBase)) - error(Arg->getSpelling() + ": number expected, but got " + S); - else if ((Config->ImageBase % Target->PageSize) != 0) - warning(Arg->getSpelling() + ": address isn't multiple of page size"); - } else { - Config->ImageBase = Config->Pic ? 0 : Target->DefaultImageBase; - } + // If an entry symbol is in a static archive, pull out that file now + // to complete the symbol table. After this, no new names except a + // few linker-synthesized ones will be added to the symbol table. + if (Symtab.find(Config->Entry)) + Symtab.addUndefined(Config->Entry); - for (std::unique_ptr<InputFile> &F : Files) - Symtab.addFile(std::move(F)); - if (HasError) - return; // There were duplicate symbols or incompatible files + // Return if there were name resolution errors. + if (ErrorCount) + return; Symtab.scanUndefinedFlags(); Symtab.scanShlibUndefined(); - Symtab.scanDynamicList(); Symtab.scanVersionScript(); - Symtab.scanSymbolVersions(); - Symtab.addCombinedLtoObject(); - if (HasError) + Symtab.addCombinedLTOObject(); + if (ErrorCount) return; for (auto *Arg : Args.filtered(OPT_wrap)) Symtab.wrap(Arg->getValue()); - // Write the result to the file. + // Now that we have a complete list of input files. + // Beyond this point, no new files are added. + // Aggregate all input sections into one place. + for (elf::ObjectFile<ELFT> *F : Symtab.getObjectFiles()) + for (InputSectionBase<ELFT> *S : F->getSections()) + if (S && S != &InputSection<ELFT>::Discarded) + Symtab.Sections.push_back(S); + for (BinaryFile *F : Symtab.getBinaryFiles()) + for (InputSectionData *S : F->getSections()) + Symtab.Sections.push_back(cast<InputSection<ELFT>>(S)); + + // Do size optimizations: garbage collection and identical code folding. if (Config->GcSections) markLive<ELFT>(); if (Config->ICF) @@ -573,16 +824,16 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { // MergeInputSection::splitIntoPieces needs to be called before // any call of MergeInputSection::getOffset. Do that. - for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : - Symtab.getObjectFiles()) - for (InputSectionBase<ELFT> *S : F->getSections()) { - if (!S || S == &InputSection<ELFT>::Discarded || !S->Live) - continue; - if (S->Compressed) - S->uncompress(); - if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) - MS->splitIntoPieces(); - } + forEach(Symtab.Sections.begin(), Symtab.Sections.end(), + [](InputSectionBase<ELFT> *S) { + if (!S->Live) + return; + if (S->isCompressed()) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) + MS->splitIntoPieces(); + }); - writeResult<ELFT>(&Symtab); + // Write the result to the file. + writeResult<ELFT>(); } diff --git a/contrib/llvm/tools/lld/ELF/Driver.h b/contrib/llvm/tools/lld/ELF/Driver.h index dc20961a3069..eeca30c42fba 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.h +++ b/contrib/llvm/tools/lld/ELF/Driver.h @@ -12,6 +12,7 @@ #include "SymbolTable.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -23,14 +24,11 @@ namespace elf { extern class LinkerDriver *Driver; -class CpioFile; - class LinkerDriver { public: - void main(ArrayRef<const char *> Args); + void main(ArrayRef<const char *> Args, bool CanExitEarly); void addFile(StringRef Path); void addLibrary(StringRef Name); - llvm::LLVMContext Context; // to parse bitcode ifles std::unique_ptr<CpioFile> Cpio; // for reproduce private: @@ -38,17 +36,19 @@ private: llvm::Optional<MemoryBufferRef> readFile(StringRef Path); void readConfigs(llvm::opt::InputArgList &Args); void createFiles(llvm::opt::InputArgList &Args); + void inferMachineType(); template <class ELFT> void link(llvm::opt::InputArgList &Args); // True if we are in --whole-archive and --no-whole-archive. - bool WholeArchive = false; + bool InWholeArchive = false; // True if we are in --start-lib and --end-lib. bool InLib = false; - llvm::BumpPtrAllocator Alloc; - std::vector<std::unique_ptr<InputFile>> Files; - std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; + // True if we are in -format=binary and -format=elf. + bool InBinary = false; + + std::vector<InputFile *> Files; }; // Parses command line options. @@ -56,9 +56,6 @@ class ELFOptTable : public llvm::opt::OptTable { public: ELFOptTable(); llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); - -private: - llvm::BumpPtrAllocator Alloc; }; // Create enum with OPT_xxx values for each option in Options.td @@ -69,41 +66,13 @@ enum { #undef OPTION }; -// This is the class to create a .cpio file for --reproduce. -// -// If "--reproduce foo" is given, we create a file "foo.cpio" and -// copy all input files to the archive, along with a response file -// to re-run the same command with the same inputs. -// It is useful for reporting issues to LLD developers. -// -// Cpio as a file format is a deliberate choice. It's standardized in -// POSIX and very easy to create. cpio command is available virtually -// on all Unix systems. See -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07 -// for the format details. -class CpioFile { -public: - static CpioFile *create(StringRef OutputPath); - void append(StringRef Path, StringRef Data); - -private: - CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename); - - std::unique_ptr<llvm::raw_fd_ostream> OS; - llvm::StringSet<> Seen; - std::string Basename; -}; - void printHelp(const char *Argv0); -std::string getVersionString(); std::vector<uint8_t> parseHexstring(StringRef S); std::string createResponseFile(const llvm::opt::InputArgList &Args); -std::string relativeToRoot(StringRef Path); -std::string findFromSearchPaths(StringRef Path); -std::string searchLibrary(StringRef Path); -std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File); +llvm::Optional<std::string> findFromSearchPaths(StringRef Path); +llvm::Optional<std::string> searchLibrary(StringRef Path); } // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/DriverUtils.cpp b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp index 274c08bcdb0f..0534070d2529 100644 --- a/contrib/llvm/tools/lld/ELF/DriverUtils.cpp +++ b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp @@ -15,14 +15,18 @@ #include "Driver.h" #include "Error.h" +#include "Memory.h" +#include "ScriptParser.h" #include "lld/Config/Version.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/StringSaver.h" +#include "llvm/Support/Process.h" using namespace llvm; using namespace llvm::sys; @@ -40,16 +44,37 @@ using namespace lld::elf; // Create table mapping all options defined in Options.td static const opt::OptTable::Info OptInfo[] = { #define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ - { \ - X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \ - OPT_##ALIAS, X6 \ - }, + {X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6}, #include "ELF/Options.inc" #undef OPTION }; ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} +// Parse -color-diagnostics={auto,always,never} or -no-color-diagnostics. +static bool getColorDiagnostics(opt::InputArgList &Args) { + bool Default = (ErrorOS == &errs() && Process::StandardErrHasColors()); + + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return Default; + if (Arg->getOption().getID() == OPT_color_diagnostics) + return true; + if (Arg->getOption().getID() == OPT_no_color_diagnostics) + return false; + + StringRef S = Arg->getValue(); + if (S == "auto") + return Default; + if (S == "always") + return true; + if (S != "never") + error("unknown option: -color-diagnostics=" + S); + return false; +} + static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { StringRef S = Arg->getValue(); @@ -76,16 +101,16 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { // --rsp-quoting. opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); - // Expand response files. '@<filename>' is replaced by the file's contents. - StringSaver Saver(Alloc); + // Expand response files (arguments in the form of @<filename>) + // and then parse the argument again. cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); - - // Parse options and then do error checking. Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + // Interpret -color-diagnostics early so that error messages + // for unknown flags are colored. + Config->ColorDiagnostics = getColorDiagnostics(Args); if (MissingCount) - error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) + - "\", expected " + Twine(MissingCount) + - (MissingCount == 1 ? " argument.\n" : " arguments")); + error(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) error("unknown argument: " + Arg->getSpelling()); @@ -97,115 +122,6 @@ void elf::printHelp(const char *Argv0) { Table.PrintHelp(outs(), Argv0, "lld", false); } -std::string elf::getVersionString() { - std::string Version = getLLDVersion(); - std::string Repo = getLLDRepositoryVersion(); - if (Repo.empty()) - return "LLD " + Version + "\n"; - return "LLD " + Version + " " + Repo + "\n"; -} - -// Makes a given pathname an absolute path first, and then remove -// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", -// assuming that the current directory is "/home/john/bar". -std::string elf::relativeToRoot(StringRef Path) { - SmallString<128> Abs = Path; - if (std::error_code EC = fs::make_absolute(Abs)) - fatal("make_absolute failed: " + EC.message()); - path::remove_dots(Abs, /*remove_dot_dot=*/true); - - // This is Windows specific. root_name() returns a drive letter - // (e.g. "c:") or a UNC name (//net). We want to keep it as part - // of the result. - SmallString<128> Res; - StringRef Root = path::root_name(Abs); - if (Root.endswith(":")) - Res = Root.drop_back(); - else if (Root.startswith("//")) - Res = Root.substr(2); - - path::append(Res, path::relative_path(Abs)); - return Res.str(); -} - -CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S) - : OS(std::move(OS)), Basename(S) {} - -CpioFile *CpioFile::create(StringRef OutputPath) { - std::string Path = (OutputPath + ".cpio").str(); - std::error_code EC; - auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, fs::F_None); - if (EC) { - error(EC, "--reproduce: failed to open " + Path); - return nullptr; - } - return new CpioFile(std::move(OS), path::filename(OutputPath)); -} - -static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) { - // The c_dev/c_ino pair should be unique according to the spec, - // but no one seems to care. - OS << "070707"; // c_magic - OS << "000000"; // c_dev - OS << "000000"; // c_ino - OS << "100664"; // c_mode: C_ISREG | rw-rw-r-- - OS << "000000"; // c_uid - OS << "000000"; // c_gid - OS << "000001"; // c_nlink - OS << "000000"; // c_rdev - OS << "00000000000"; // c_mtime - OS << format("%06o", Path.size() + 1); // c_namesize - OS << format("%011o", Data.size()); // c_filesize - OS << Path << '\0'; // c_name - OS << Data; // c_filedata -} - -void CpioFile::append(StringRef Path, StringRef Data) { - if (!Seen.insert(Path).second) - return; - - // Construct an in-archive filename so that /home/foo/bar is stored - // as baz/home/foo/bar where baz is the basename of the output file. - // (i.e. in that case we are creating baz.cpio.) - SmallString<128> Fullpath; - path::append(Fullpath, Basename, Path); - - // Use unix path separators so the cpio can be extracted on both unix and - // windows. - std::replace(Fullpath.begin(), Fullpath.end(), '\\', '/'); - - writeMember(*OS, Fullpath, Data); - - // Print the trailer and seek back. - // This way we have a valid archive if we crash. - uint64_t Pos = OS->tell(); - writeMember(*OS, "TRAILER!!!", ""); - OS->seek(Pos); -} - -// Quote a given string if it contains a space character. -static std::string quote(StringRef S) { - if (S.find(' ') == StringRef::npos) - return S; - return ("\"" + S + "\"").str(); -} - -static std::string rewritePath(StringRef S) { - if (fs::exists(S)) - return relativeToRoot(S); - return S; -} - -static std::string stringize(opt::Arg *Arg) { - std::string K = Arg->getSpelling(); - if (Arg->getNumValues() == 0) - return K; - std::string V = quote(Arg->getValue()); - if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle) - return K + V; - return K + " " + V; -} - // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. std::string elf::createResponseFile(const opt::InputArgList &Args) { @@ -226,8 +142,8 @@ std::string elf::createResponseFile(const opt::InputArgList &Args) { case OPT_alias_script_T: case OPT_script: case OPT_version_script: - OS << Arg->getSpelling() << " " - << quote(rewritePath(Arg->getValue())) << "\n"; + OS << Arg->getSpelling() << " " << quote(rewritePath(Arg->getValue())) + << "\n"; break; default: OS << stringize(Arg) << "\n"; @@ -236,41 +152,39 @@ std::string elf::createResponseFile(const opt::InputArgList &Args) { return Data.str(); } -std::string elf::findFromSearchPaths(StringRef Path) { - for (StringRef Dir : Config->SearchPaths) { - std::string FullPath = buildSysrootedPath(Dir, Path); - if (fs::exists(FullPath)) - return FullPath; - } - return ""; +// Find a file by concatenating given paths. If a resulting path +// starts with "=", the character is replaced with a --sysroot value. +static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { + SmallString<128> S; + if (Path1.startswith("=")) + path::append(S, Config->Sysroot, Path1.substr(1), Path2); + else + path::append(S, Path1, Path2); + + if (fs::exists(S)) + return S.str().str(); + return None; +} + +Optional<std::string> elf::findFromSearchPaths(StringRef Path) { + for (StringRef Dir : Config->SearchPaths) + if (Optional<std::string> S = findFile(Dir, Path)) + return S; + return None; } -// Searches a given library from input search paths, which are filled -// from -L command line switches. Returns a path to an existent library file. -std::string elf::searchLibrary(StringRef Path) { - if (Path.startswith(":")) - return findFromSearchPaths(Path.substr(1)); +// This is for -lfoo. We'll look for libfoo.so or libfoo.a from +// search paths. +Optional<std::string> elf::searchLibrary(StringRef Name) { + if (Name.startswith(":")) + return findFromSearchPaths(Name.substr(1)); + for (StringRef Dir : Config->SearchPaths) { - if (!Config->Static) { - std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".so").str()); - if (fs::exists(S)) + if (!Config->Static) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".so")) return S; - } - std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".a").str()); - if (fs::exists(S)) + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a")) return S; } - return ""; -} - -// Makes a path by concatenating Dir and File. -// If Dir starts with '=' the result will be preceded by Sysroot, -// which can be set with --sysroot command line switch. -std::string elf::buildSysrootedPath(StringRef Dir, StringRef File) { - SmallString<128> Path; - if (Dir.startswith("=")) - path::append(Path, Config->Sysroot, Dir.substr(1), File); - else - path::append(Path, Dir, File); - return Path.str(); + return None; } diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.cpp b/contrib/llvm/tools/lld/ELF/EhFrame.cpp index b130ac1ca22d..2428473d9012 100644 --- a/contrib/llvm/tools/lld/ELF/EhFrame.cpp +++ b/contrib/llvm/tools/lld/ELF/EhFrame.cpp @@ -18,6 +18,9 @@ #include "EhFrame.h" #include "Error.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Strings.h" #include "llvm/Object/ELF.h" #include "llvm/Support/Dwarf.h" @@ -29,49 +32,93 @@ using namespace llvm::dwarf; using namespace llvm::object; using namespace llvm::support::endian; -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; +namespace { +template <class ELFT> class EhReader { +public: + EhReader(InputSectionBase<ELFT> *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} + size_t readEhRecordSize(); + uint8_t getFdeEncoding(); + +private: + template <class P> void failOn(const P *Loc, const Twine &Msg) { + fatal(IS->getLocation((const uint8_t *)Loc - IS->Data.data()) + ": " + Msg); + } + + uint8_t readByte(); + void skipBytes(size_t Count); + StringRef readString(); + void skipLeb128(); + void skipAugP(); + + InputSectionBase<ELFT> *IS; + ArrayRef<uint8_t> D; +}; +} + +template <class ELFT> +size_t elf::readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off) { + return EhReader<ELFT>(S, S->Data.slice(Off)).readEhRecordSize(); +} // .eh_frame section is a sequence of records. Each record starts with // a 4 byte length field. This function reads the length. -template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> D) { +template <class ELFT> size_t EhReader<ELFT>::readEhRecordSize() { const endianness E = ELFT::TargetEndianness; if (D.size() < 4) - fatal("CIE/FDE too small"); + failOn(D.data(), "CIE/FDE too small"); // First 4 bytes of CIE/FDE is the size of the record. // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead, // but we do not support that format yet. uint64_t V = read32<E>(D.data()); if (V == UINT32_MAX) - fatal("CIE/FDE too large"); + failOn(D.data(), "CIE/FDE too large"); uint64_t Size = V + 4; if (Size > D.size()) - fatal("CIE/FIE ends past the end of the section"); + failOn(D.data(), "CIE/FDE ends past the end of the section"); return Size; } // Read a byte and advance D by one byte. -static uint8_t readByte(ArrayRef<uint8_t> &D) { +template <class ELFT> uint8_t EhReader<ELFT>::readByte() { if (D.empty()) - fatal("corrupted or unsupported CIE information"); + failOn(D.data(), "unexpected end of CIE"); uint8_t B = D.front(); D = D.slice(1); return B; } +template <class ELFT> void EhReader<ELFT>::skipBytes(size_t Count) { + if (D.size() < Count) + failOn(D.data(), "CIE is too small"); + D = D.slice(Count); +} + +// Read a null-terminated string. +template <class ELFT> StringRef EhReader<ELFT>::readString() { + const uint8_t *End = std::find(D.begin(), D.end(), '\0'); + if (End == D.end()) + failOn(D.data(), "corrupted CIE (failed to read string)"); + StringRef S = toStringRef(D.slice(0, End - D.begin())); + D = D.slice(S.size() + 1); + return S; +} + // Skip an integer encoded in the LEB128 format. // Actual number is not of interest because only the runtime needs it. // But we need to be at least able to skip it so that we can read // the field that follows a LEB128 number. -static void skipLeb128(ArrayRef<uint8_t> &D) { +template <class ELFT> void EhReader<ELFT>::skipLeb128() { + const uint8_t *ErrPos = D.data(); while (!D.empty()) { uint8_t Val = D.front(); D = D.slice(1); if ((Val & 0x80) == 0) return; } - fatal("corrupted or unsupported CIE information"); + failOn(ErrPos, "corrupted CIE (failed to read LEB128)"); } template <class ELFT> static size_t getAugPSize(unsigned Enc) { @@ -89,79 +136,79 @@ template <class ELFT> static size_t getAugPSize(unsigned Enc) { case DW_EH_PE_sdata8: return 8; } - fatal("unknown FDE encoding"); + return 0; } -template <class ELFT> static void skipAugP(ArrayRef<uint8_t> &D) { - uint8_t Enc = readByte(D); +template <class ELFT> void EhReader<ELFT>::skipAugP() { + uint8_t Enc = readByte(); if ((Enc & 0xf0) == DW_EH_PE_aligned) - fatal("DW_EH_PE_aligned encoding is not supported"); + failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported"); size_t Size = getAugPSize<ELFT>(Enc); + if (Size == 0) + failOn(D.data() - 1, "unknown FDE encoding"); if (Size >= D.size()) - fatal("corrupted CIE"); + failOn(D.data() - 1, "corrupted CIE"); D = D.slice(Size); } -template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> D) { - if (D.size() < 8) - fatal("CIE too small"); - D = D.slice(8); +template <class ELFT> uint8_t elf::getFdeEncoding(EhSectionPiece *P) { + auto *IS = static_cast<InputSectionBase<ELFT> *>(P->ID); + return EhReader<ELFT>(IS, P->data()).getFdeEncoding(); +} - uint8_t Version = readByte(D); +template <class ELFT> uint8_t EhReader<ELFT>::getFdeEncoding() { + skipBytes(8); + int Version = readByte(); if (Version != 1 && Version != 3) - fatal("FDE version 1 or 3 expected, but got " + Twine((unsigned)Version)); + failOn(D.data() - 1, + "FDE version 1 or 3 expected, but got " + Twine(Version)); - const unsigned char *AugEnd = std::find(D.begin(), D.end(), '\0'); - if (AugEnd == D.end()) - fatal("corrupted CIE"); - StringRef Aug(reinterpret_cast<const char *>(D.begin()), AugEnd - D.begin()); - D = D.slice(Aug.size() + 1); + StringRef Aug = readString(); - // Code alignment factor should always be 1 for .eh_frame. - if (readByte(D) != 1) - fatal("CIE code alignment must be 1"); - - // Skip data alignment factor. - skipLeb128(D); + // Skip code and data alignment factors. + skipLeb128(); + skipLeb128(); // Skip the return address register. In CIE version 1 this is a single // byte. In CIE version 3 this is an unsigned LEB128. if (Version == 1) - readByte(D); + readByte(); else - skipLeb128(D); + skipLeb128(); // We only care about an 'R' value, but other records may precede an 'R' // record. Unfortunately records are not in TLV (type-length-value) format, // so we need to teach the linker how to skip records for each type. for (char C : Aug) { if (C == 'R') - return readByte(D); + return readByte(); if (C == 'z') { - skipLeb128(D); + skipLeb128(); continue; } if (C == 'P') { - skipAugP<ELFT>(D); + skipAugP(); continue; } if (C == 'L') { - readByte(D); + readByte(); continue; } - fatal("unknown .eh_frame augmentation string: " + Aug); + failOn(Aug.data(), "unknown .eh_frame augmentation string: " + Aug); } return DW_EH_PE_absptr; } -template size_t readEhRecordSize<ELF32LE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF32BE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF64LE>(ArrayRef<uint8_t>); -template size_t readEhRecordSize<ELF64BE>(ArrayRef<uint8_t>); - -template uint8_t getFdeEncoding<ELF32LE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF32BE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF64LE>(ArrayRef<uint8_t>); -template uint8_t getFdeEncoding<ELF64BE>(ArrayRef<uint8_t>); -} -} +template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase<ELF32LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase<ELF32BE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase<ELF64LE> *S, + size_t Off); +template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase<ELF64BE> *S, + size_t Off); + +template uint8_t elf::getFdeEncoding<ELF32LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF32BE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64LE>(EhSectionPiece *P); +template uint8_t elf::getFdeEncoding<ELF64BE>(EhSectionPiece *P); diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.h b/contrib/llvm/tools/lld/ELF/EhFrame.h index 0d5a2ff2f417..cadc93d3a2e4 100644 --- a/contrib/llvm/tools/lld/ELF/EhFrame.h +++ b/contrib/llvm/tools/lld/ELF/EhFrame.h @@ -14,8 +14,12 @@ namespace lld { namespace elf { -template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> Data); -template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> Data); +template <class ELFT> class InputSectionBase; +struct EhSectionPiece; + +template <class ELFT> +size_t readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off); +template <class ELFT> uint8_t getFdeEncoding(EhSectionPiece *P); } } diff --git a/contrib/llvm/tools/lld/ELF/Error.cpp b/contrib/llvm/tools/lld/ELF/Error.cpp index 59a49c17b97c..6e30f08143ed 100644 --- a/contrib/llvm/tools/lld/ELF/Error.cpp +++ b/contrib/llvm/tools/lld/ELF/Error.cpp @@ -12,54 +12,95 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" +#include <mutex> +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#endif + +using namespace lld::elf; using namespace llvm; namespace lld { -namespace elf { -bool HasError; -raw_ostream *ErrorOS; +uint64_t elf::ErrorCount; +raw_ostream *elf::ErrorOS; +StringRef elf::Argv0; + +// The functions defined in this file can be called from multiple threads, +// but outs() or errs() are not thread-safe. We protect them using a mutex. +static std::mutex Mu; + +static void print(StringRef S, raw_ostream::Colors C) { + *ErrorOS << Argv0 + ": "; + if (Config->ColorDiagnostics) { + ErrorOS->changeColor(C, true); + *ErrorOS << S; + ErrorOS->resetColor(); + } else { + *ErrorOS << S; + } +} -void log(const Twine &Msg) { +void elf::log(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); if (Config->Verbose) - outs() << Msg << "\n"; + outs() << Argv0 << ": " << Msg << "\n"; } -void warning(const Twine &Msg) { - if (Config->FatalWarnings) +void elf::warn(const Twine &Msg) { + if (Config->FatalWarnings) { error(Msg); - else - *ErrorOS << Msg << "\n"; + return; + } + std::lock_guard<std::mutex> Lock(Mu); + print("warning: ", raw_ostream::MAGENTA); + *ErrorOS << Msg << "\n"; } -void error(const Twine &Msg) { - *ErrorOS << Msg << "\n"; - HasError = true; +void elf::error(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + + if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + } else if (ErrorCount == Config->ErrorLimit) { + print("error: ", raw_ostream::RED); + *ErrorOS << "too many errors emitted, stopping now" + << " (use -error-limit=0 to see all errors)\n"; + if (Config->ExitEarly) + exitLld(1); + } + + ++ErrorCount; } -void error(std::error_code EC, const Twine &Prefix) { +void elf::error(std::error_code EC, const Twine &Prefix) { error(Prefix + ": " + EC.message()); } -void fatal(const Twine &Msg) { - *ErrorOS << Msg << "\n"; - exit(1); -} +void elf::exitLld(int Val) { + // Dealloc/destroy ManagedStatic variables before calling + // _exit(). In a non-LTO build, this is a nop. In an LTO + // build allows us to get the output of -time-passes. + llvm_shutdown(); -void fatal(const Twine &Msg, const Twine &Prefix) { - fatal(Prefix + ": " + Msg); + outs().flush(); + errs().flush(); + _exit(Val); } -void check(std::error_code EC) { - if (EC) - fatal(EC.message()); +void elf::fatal(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + print("error: ", raw_ostream::RED); + *ErrorOS << Msg << "\n"; + exitLld(1); } -void check(Error Err) { - check(errorToErrorCode(std::move(Err))); +void elf::fatal(std::error_code EC, const Twine &Prefix) { + fatal(Prefix + ": " + EC.message()); } -} // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Error.h b/contrib/llvm/tools/lld/ELF/Error.h index 552f50498464..1ec683595cf4 100644 --- a/contrib/llvm/tools/lld/ELF/Error.h +++ b/contrib/llvm/tools/lld/ELF/Error.h @@ -6,31 +6,47 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// In LLD, we have three levels of errors: fatal, error or warn. +// +// Fatal makes the program exit immediately with an error message. +// You shouldn't use it except for reporting a corrupted input file. +// +// Error prints out an error message and increment a global variable +// ErrorCount to record the fact that we met an error condition. It does +// not exit, so it is safe for a lld-as-a-library use case. It is generally +// useful because it can report more than one errors in a single run. +// +// Warn doesn't do anything but printing out a given message. +// +//===----------------------------------------------------------------------===// -#ifndef LLD_COFF_ERROR_H -#define LLD_COFF_ERROR_H +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H #include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" + namespace lld { namespace elf { -extern bool HasError; +extern uint64_t ErrorCount; extern llvm::raw_ostream *ErrorOS; +extern llvm::StringRef Argv0; void log(const Twine &Msg); -void warning(const Twine &Msg); +void warn(const Twine &Msg); void error(const Twine &Msg); void error(std::error_code EC, const Twine &Prefix); -template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) { - error(V.getError(), Prefix); -} - +LLVM_ATTRIBUTE_NORETURN void exitLld(int Val); LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); +// check() functions are convenient functions to strip errors +// from error-or-value objects. template <class T> T check(ErrorOr<T> E) { if (auto EC = E.getError()) fatal(EC.message()); @@ -39,19 +55,23 @@ template <class T> T check(ErrorOr<T> E) { template <class T> T check(Expected<T> E) { if (!E) - fatal(errorToErrorCode(E.takeError()).message()); + handleAllErrors(std::move(E.takeError()), + [](llvm::ErrorInfoBase &EIB) -> Error { + fatal(EIB.message()); + return Error::success(); + }); return std::move(*E); } template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { if (auto EC = E.getError()) - fatal(EC.message(), Prefix); + fatal(Prefix + ": " + EC.message()); return std::move(*E); } template <class T> T check(Expected<T> E, const Twine &Prefix) { if (!E) - fatal(errorToErrorCode(E.takeError()).message(), Prefix); + fatal(Prefix + ": " + errorToErrorCode(E.takeError()).message()); return std::move(*E); } diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.cpp b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp new file mode 100644 index 000000000000..762144dd0a96 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.cpp @@ -0,0 +1,205 @@ +//===- GdbIndex.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// File contains classes for implementation of --gdb-index command line option. +// +// If that option is used, linker should emit a .gdb_index section that allows +// debugger to locate and read .dwo files, containing neccessary debug +// information. +// More information about implementation can be found in DWARF specification, +// latest version is available at http://dwarfstd.org. +// +// .gdb_index section format: +// (Information is based on/taken from +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) +// +// A mapped index consists of several areas, laid out in order: +// 1) The file header. +// 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit +// little-endian values, sorted by the CU offset. The first element in each +// pair is the offset of a CU in the .debug_info section. The second element +// in each pair is the length of that CU. References to a CU elsewhere in the +// map are done using a CU index, which is just the 0-based index into this +// table. Note that if there are type CUs, then conceptually CUs and type CUs +// form a single list for the purposes of CU indices."(*) +// 3) The types CU list. Depricated as .debug_types does not appear in the DWARF +// v5 specification. +// 4) The address area. The address area is a sequence of address +// entries, where each entrie contains low address, high address and CU +// index. +// 5) "The symbol table. This is an open-addressed hash table. The size of the +// hash table is always a power of 2. Each slot in the hash table consists of +// a pair of offset_type values. The first value is the offset of the +// symbol's name in the constant pool. The second value is the offset of the +// CU vector in the constant pool."(*) +// 6) "The constant pool. This is simply a bunch of bytes. It is organized so +// that alignment is correct: CU vectors are stored first, followed by +// strings." (*) +// +// For constructing the .gdb_index section following steps should be performed: +// 1) For file header nothing special should be done. It contains the offsets to +// the areas below. +// 2) Scan the compilation unit headers of the .debug_info sections to build a +// list of compilation units. +// 3) CU Types are no longer needed as DWARF skeleton type units never made it +// into the standard. lld does nothing to support parsing of .debug_types +// and generates empty types CU area in .gdb_index section. +// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of +// .debug_info sections. +// 5) For building the symbol table linker extracts the public names from the +// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the +// hashtable in according to .gdb_index format specification. +// 6) Constant pool is populated at the same time as symbol table. +//===----------------------------------------------------------------------===// + +#include "GdbIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; +using namespace llvm::object; +using namespace lld::elf; + +template <class ELFT> +GdbIndexBuilder<ELFT>::GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec) + : DebugInfoSec(DebugInfoSec) { + if (Expected<std::unique_ptr<object::ObjectFile>> Obj = + object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) + Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); + else + error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); +} + +template <class ELFT> +std::vector<std::pair<typename ELFT::uint, typename ELFT::uint>> +GdbIndexBuilder<ELFT>::readCUList() { + std::vector<std::pair<uintX_t, uintX_t>> Ret; + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) + Ret.push_back( + {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); + return Ret; +} + +template <class ELFT> +std::vector<std::pair<StringRef, uint8_t>> +GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DWARFDebugPubTable PubTable(D, IsLE, true); + for (const DWARFDebugPubTable::Set &S : PubTable.getData()) + for (const DWARFDebugPubTable::Entry &E : S.Entries) + Ret.push_back({E.Name, E.Descriptor.toBits()}); + } + return Ret; +} + +std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector<GdbSymbol *> NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template <class ELFT> +static InputSectionBase<ELFT> * +findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { + for (InputSectionBase<ELFT> *S : Arr) + if (S && S != &InputSection<ELFT>::Discarded) + if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) + return S; + return nullptr; +} + +template <class ELFT> +std::vector<AddressEntry<ELFT>> +GdbIndexBuilder<ELFT>::readAddressArea(size_t CurrentCU) { + std::vector<AddressEntry<ELFT>> Ret; + for (const auto &CU : Dwarf->compile_units()) { + DWARFAddressRangesVector Ranges; + CU->collectAddressRanges(Ranges); + + ArrayRef<InputSectionBase<ELFT> *> Sections = + DebugInfoSec->getFile()->getSections(); + + for (std::pair<uint64_t, uint64_t> &R : Ranges) + if (InputSectionBase<ELFT> *S = findSection(Sections, R.first)) + Ret.push_back( + {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); + ++CurrentCU; + } + return Ret; +} + +// We return file offset as load address for allocatable sections. That is +// currently used for collecting address ranges in readAddressArea(). We are +// able then to find section index that range belongs to. +template <class ELFT> +uint64_t GdbIndexBuilder<ELFT>::getSectionLoadAddress( + const object::SectionRef &Sec) const { + if (static_cast<const ELFSectionRef &>(Sec).getFlags() & ELF::SHF_ALLOC) + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + return 0; +} + +template <class ELFT> +std::unique_ptr<LoadedObjectInfo> GdbIndexBuilder<ELFT>::clone() const { + return {}; +} + +namespace lld { +namespace elf { +template class GdbIndexBuilder<ELF32LE>; +template class GdbIndexBuilder<ELF32BE>; +template class GdbIndexBuilder<ELF64LE>; +template class GdbIndexBuilder<ELF64BE>; +} +} diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.h b/contrib/llvm/tools/lld/ELF/GdbIndex.h new file mode 100644 index 000000000000..c761ea173a8d --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.h @@ -0,0 +1,99 @@ +//===- GdbIndex.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_ELF_GDB_INDEX_H +#define LLD_ELF_GDB_INDEX_H + +#include "InputFiles.h" +#include "llvm/Object/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> class InputSection; + +// Struct represents single entry of address area of gdb index. +template <class ELFT> struct AddressEntry { + InputSectionBase<ELFT> *Section; + uint64_t LowAddress; + uint64_t HighAddress; + size_t CuIndex; +}; + +// GdbIndexBuilder is a helper class used for extracting data required +// for building .gdb_index section from objects. +template <class ELFT> class GdbIndexBuilder : public llvm::LoadedObjectInfo { + typedef typename ELFT::uint uintX_t; + + InputSection<ELFT> *DebugInfoSec; + + std::unique_ptr<llvm::DWARFContext> Dwarf; + +public: + GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec); + + // Extracts the compilation units. Each first element of pair is a offset of a + // CU in the .debug_info section and second is the length of that CU. + std::vector<std::pair<uintX_t, uintX_t>> readCUList(); + + // Extracts the vector of address area entries. Accepts global index of last + // parsed CU. + std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); + + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); + +private: + // Method returns section file offset as a load addres for DWARF parser. That + // allows to find the target section index for address ranges. + uint64_t + getSectionLoadAddress(const llvm::object::SectionRef &Sec) const override; + std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector<GdbSymbol *> Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/ICF.cpp b/contrib/llvm/tools/lld/ELF/ICF.cpp index 10a2603b3b3e..32cd0f8a185c 100644 --- a/contrib/llvm/tools/lld/ELF/ICF.cpp +++ b/contrib/llvm/tools/lld/ELF/ICF.cpp @@ -7,63 +7,82 @@ // //===----------------------------------------------------------------------===// // -// Identical Code Folding is a feature to merge sections not by name (which -// is regular comdat handling) but by contents. If two non-writable sections -// have the same data, relocations, attributes, etc., then the two -// are considered identical and merged by the linker. This optimization -// makes outputs smaller. +// ICF is short for Identical Code Folding. This is a size optimization to +// identify and merge two or more read-only sections (typically functions) +// that happened to have the same contents. It usually reduces output size +// by a few percent. // -// ICF is theoretically a problem of reducing graphs by merging as many -// identical subgraphs as possible if we consider sections as vertices and -// relocations as edges. It may sound simple, but it is a bit more -// complicated than you might think. The order of processing sections -// matters because merging two sections can make other sections, whose -// relocations now point to the same section, mergeable. Graphs may contain -// cycles. We need a sophisticated algorithm to do this properly and -// efficiently. +// In ICF, two sections are considered identical if they have the same +// section flags, section data, and relocations. Relocations are tricky, +// because two relocations are considered the same if they have the same +// relocation types, values, and if they point to the same sections *in +// terms of ICF*. // -// What we do in this file is this. We split sections into groups. Sections -// in the same group are considered identical. +// Here is an example. If foo and bar defined below are compiled to the +// same machine instructions, ICF can and should merge the two, although +// their relocations point to each other. // -// We begin by optimistically putting all sections into a single equivalence -// class. Then we apply a series of checks that split this initial -// equivalence class into more and more refined equivalence classes based on -// the properties by which a section can be distinguished. +// void foo() { bar(); } +// void bar() { foo(); } // -// We begin by checking that the section contents and flags are the -// same. This only needs to be done once since these properties don't depend -// on the current equivalence class assignment. +// If you merge the two, their relocations point to the same section and +// thus you know they are mergeable, but how do you know they are +// mergeable in the first place? This is not an easy problem to solve. // -// Then we split the equivalence classes based on checking that their -// relocations are the same, where relocation targets are compared by their -// equivalence class, not the concrete section. This may need to be done -// multiple times because as the equivalence classes are refined, two -// sections that had a relocation target in the same equivalence class may -// now target different equivalence classes, and hence these two sections -// must be put in different equivalence classes (whereas in the previous -// iteration they were not since the relocation target was the same.) +// What we are doing in LLD is to partition sections into equivalence +// classes. Sections in the same equivalence class when the algorithm +// terminates are considered identical. Here are details: // -// Our algorithm is smart enough to merge the following mutually-recursive -// functions. +// 1. First, we partition sections using their hash values as keys. Hash +// values contain section types, section contents and numbers of +// relocations. During this step, relocation targets are not taken into +// account. We just put sections that apparently differ into different +// equivalence classes. // -// void foo() { bar(); } -// void bar() { foo(); } +// 2. Next, for each equivalence class, we visit sections to compare +// relocation targets. Relocation targets are considered equivalent if +// their targets are in the same equivalence class. Sections with +// different relocation targets are put into different equivalence +// clases. +// +// 3. If we split an equivalence class in step 2, two relocations +// previously target the same equivalence class may now target +// different equivalence classes. Therefore, we repeat step 2 until a +// convergence is obtained. +// +// 4. For each equivalence class C, pick an arbitrary section in C, and +// merge all the other sections in C with it. +// +// For small programs, this algorithm needs 3-5 iterations. For large +// programs such as Chromium, it takes more than 20 iterations. +// +// This algorithm was mentioned as an "optimistic algorithm" in [1], +// though gold implements a different algorithm than this. +// +// We parallelize each step so that multiple threads can work on different +// equivalence classes concurrently. That gave us a large performance +// boost when applying ICF on large programs. For example, MSVC link.exe +// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output +// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a +// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still +// faster than MSVC or gold though. // -// This algorithm is so-called "optimistic" algorithm described in -// http://research.google.com/pubs/pub36912.html. (Note that what GNU -// gold implemented is different from the optimistic algorithm.) +// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding +// in the Gold Linker +// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf // //===----------------------------------------------------------------------===// #include "ICF.h" #include "Config.h" -#include "OutputSections.h" #include "SymbolTable.h" +#include "Threads.h" #include "llvm/ADT/Hashing.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ELF.h" -#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <atomic> using namespace lld; using namespace lld::elf; @@ -71,143 +90,132 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; -namespace lld { -namespace elf { +namespace { template <class ELFT> class ICF { - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; - typedef Elf_Rel_Impl<ELFT, false> Elf_Rel; - - using Comparator = std::function<bool(const InputSection<ELFT> *, - const InputSection<ELFT> *)>; - public: void run(); private: - uint64_t NextId = 1; - - static void setLive(SymbolTable<ELFT> *S); - static uint64_t relSize(InputSection<ELFT> *S); - static uint64_t getHash(InputSection<ELFT> *S); - static bool isEligible(InputSectionBase<ELFT> *Sec); - static std::vector<InputSection<ELFT> *> getSections(); - - void segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, - Comparator Eq); - - void forEachGroup(std::vector<InputSection<ELFT> *> &V, Comparator Eq); + void segregate(size_t Begin, size_t End, bool Constant); template <class RelTy> - static bool relocationEq(ArrayRef<RelTy> RA, ArrayRef<RelTy> RB); + bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB); template <class RelTy> - static bool variableEq(const InputSection<ELFT> *A, - const InputSection<ELFT> *B, ArrayRef<RelTy> RA, - ArrayRef<RelTy> RB); - - static bool equalsConstant(const InputSection<ELFT> *A, - const InputSection<ELFT> *B); - - static bool equalsVariable(const InputSection<ELFT> *A, - const InputSection<ELFT> *B); + bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB); + + bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + + size_t findBoundary(size_t Begin, size_t End); + + void forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn); + + void forEachClass(std::function<void(size_t, size_t)> Fn); + + std::vector<InputSection<ELFT> *> Sections; + + // We repeat the main loop while `Repeat` is true. + std::atomic<bool> Repeat; + + // The main loop counter. + int Cnt = 0; + + // We have two locations for equivalence classes. On the first iteration + // of the main loop, Class[0] has a valid value, and Class[1] contains + // garbage. We read equivalence classes from slot 0 and write to slot 1. + // So, Class[0] represents the current class, and Class[1] represents + // the next class. On each iteration, we switch their roles and use them + // alternately. + // + // Why are we doing this? Recall that other threads may be working on + // other equivalence classes in parallel. They may read sections that we + // are updating. We cannot update equivalence classes in place because + // it breaks the invariance that all possibly-identical sections must be + // in the same equivalence class at any moment. In other words, the for + // loop to update equivalence classes is not atomic, and that is + // observable from other threads. By writing new classes to other + // places, we can keep the invariance. + // + // Below, `Current` has the index of the current class, and `Next` has + // the index of the next class. If threading is enabled, they are either + // (0, 1) or (1, 0). + // + // Note on single-thread: if that's the case, they are always (0, 0) + // because we can safely read the next class without worrying about race + // conditions. Using the same location makes this algorithm converge + // faster because it uses results of the same iteration earlier. + int Current = 0; + int Next = 0; }; } -} // Returns a hash value for S. Note that the information about // relocation targets is not included in the hash value. -template <class ELFT> uint64_t ICF<ELFT>::getHash(InputSection<ELFT> *S) { - uint64_t Flags = S->getSectionHdr()->sh_flags; - uint64_t H = hash_combine(Flags, S->getSize()); - for (const Elf_Shdr *Rel : S->RelocSections) - H = hash_combine(H, (uint64_t)Rel->sh_size); - return H; +template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) { + return hash_combine(S->Flags, S->getSize(), S->NumRelocations); } -// Returns true if Sec is subject of ICF. -template <class ELFT> bool ICF<ELFT>::isEligible(InputSectionBase<ELFT> *Sec) { - if (!Sec || Sec == &InputSection<ELFT>::Discarded || !Sec->Live) - return false; - auto *S = dyn_cast<InputSection<ELFT>>(Sec); - if (!S) - return false; - +// Returns true if section S is subject of ICF. +template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { // .init and .fini contains instructions that must be executed to // initialize and finalize the process. They cannot and should not // be merged. - StringRef Name = S->getSectionName(); - if (Name == ".init" || Name == ".fini") - return false; - - const Elf_Shdr &H = *S->getSectionHdr(); - return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE); -} - -template <class ELFT> -std::vector<InputSection<ELFT> *> ICF<ELFT>::getSections() { - std::vector<InputSection<ELFT> *> V; - for (const std::unique_ptr<ObjectFile<ELFT>> &F : - Symtab<ELFT>::X->getObjectFiles()) - for (InputSectionBase<ELFT> *S : F->getSections()) - if (isEligible(S)) - V.push_back(cast<InputSection<ELFT>>(S)); - return V; + return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) && + S->Name != ".init" && S->Name != ".fini"; } -// All sections between Begin and End must have the same group ID before -// you call this function. This function compare sections between Begin -// and End using Eq and assign new group IDs for new groups. +// Split an equivalence class into smaller classes. template <class ELFT> -void ICF<ELFT>::segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End, - Comparator Eq) { - // This loop rearranges [Begin, End) so that all sections that are - // equal in terms of Eq are contiguous. The algorithm is quadratic in - // the worst case, but that is not an issue in practice because the - // number of distinct sections in [Begin, End) is usually very small. - InputSection<ELFT> **I = Begin; - for (;;) { - InputSection<ELFT> *Head = *I; +void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { + // This loop rearranges sections in [Begin, End) so that all sections + // that are equal in terms of equals{Constant,Variable} are contiguous + // in [Begin, End). + // + // The algorithm is quadratic in the worst case, but that is not an + // issue in practice because the number of the distinct sections in + // each range is usually very small. + + while (Begin < End) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. auto Bound = std::stable_partition( - I + 1, End, [&](InputSection<ELFT> *S) { return Eq(Head, S); }); - if (Bound == End) - return; - uint64_t Id = NextId++; - for (; I != Bound; ++I) - (*I)->GroupId = Id; - } -} - -template <class ELFT> -void ICF<ELFT>::forEachGroup(std::vector<InputSection<ELFT> *> &V, - Comparator Eq) { - for (InputSection<ELFT> **I = V.data(), **E = I + V.size(); I != E;) { - InputSection<ELFT> *Head = *I; - auto Bound = std::find_if(I + 1, E, [&](InputSection<ELFT> *S) { - return S->GroupId != Head->GroupId; - }); - segregate(I, Bound, Eq); - I = Bound; + Sections.begin() + Begin + 1, Sections.begin() + End, + [&](InputSection<ELFT> *S) { + if (Constant) + return equalsConstant(Sections[Begin], S); + return equalsVariable(Sections[Begin], S); + }); + size_t Mid = Bound - Sections.begin(); + + // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by + // updating the sections in [Begin, End). We use Mid as an equivalence + // class ID because every group ends with a unique index. + for (size_t I = Begin; I < Mid; ++I) + Sections[I]->Class[Next] = Mid; + + // If we created a group, we need to iterate the main loop again. + if (Mid != End) + Repeat = true; + + Begin = Mid; } } // Compare two lists of relocations. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { - const RelTy *IA = RelsA.begin(); - const RelTy *EA = RelsA.end(); - const RelTy *IB = RelsB.begin(); - const RelTy *EB = RelsB.end(); - if (EA - IA != EB - IB) - return false; - for (; IA != EA; ++IA, ++IB) - if (IA->r_offset != IB->r_offset || - IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) || - getAddend<ELFT>(*IA) != getAddend<ELFT>(*IB)) - return false; - return true; +bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { + auto Eq = [](const RelTy &A, const RelTy &B) { + return A.r_offset == B.r_offset && + A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && + getAddend<ELFT>(A) == getAddend<ELFT>(B); + }; + + return RelsA.size() == RelsB.size() && + std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); } // Compare "non-moving" part of two InputSections, namely everything @@ -215,125 +223,155 @@ bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { template <class ELFT> bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B) { - if (A->RelocSections.size() != B->RelocSections.size()) + if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || + A->getSize() != B->getSize() || A->Data != B->Data) return false; - for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { - const Elf_Shdr *RA = A->RelocSections[I]; - const Elf_Shdr *RB = B->RelocSections[I]; - ELFFile<ELFT> &FileA = A->File->getObj(); - ELFFile<ELFT> &FileB = B->File->getObj(); - if (RA->sh_type == SHT_RELA) { - if (!relocationEq(FileA.relas(RA), FileB.relas(RB))) - return false; - } else { - if (!relocationEq(FileA.rels(RA), FileB.rels(RB))) - return false; - } - } - - return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags && - A->getSize() == B->getSize() && - A->getSectionData() == B->getSectionData(); + if (A->AreRelocsRela) + return constantEq(A->relas(), B->relas()); + return constantEq(A->rels(), B->rels()); } +// Compare two lists of relocations. Returns true if all pairs of +// relocations point to the same section in terms of ICF. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, - const InputSection<ELFT> *B, ArrayRef<RelTy> RelsA, - ArrayRef<RelTy> RelsB) { - const RelTy *IA = RelsA.begin(); - const RelTy *EA = RelsA.end(); - const RelTy *IB = RelsB.begin(); - for (; IA != EA; ++IA, ++IB) { - SymbolBody &SA = A->File->getRelocTargetSym(*IA); - SymbolBody &SB = B->File->getRelocTargetSym(*IB); +bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, + const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) { + auto Eq = [&](const RelTy &RA, const RelTy &RB) { + // The two sections must be identical. + SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); + SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); if (&SA == &SB) - continue; + return true; - // Or, the symbols should be pointing to the same section - // in terms of the group ID. + // Or, the two sections must be in the same equivalence class. auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); if (!DA || !DB) return false; if (DA->Value != DB->Value) return false; - InputSection<ELFT> *X = dyn_cast<InputSection<ELFT>>(DA->Section); - InputSection<ELFT> *Y = dyn_cast<InputSection<ELFT>>(DB->Section); - if (X && Y && X->GroupId && X->GroupId == Y->GroupId) - continue; - return false; - } - return true; + + auto *X = dyn_cast<InputSection<ELFT>>(DA->Section); + auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + if (!X || !Y) + return false; + + // Ineligible sections are in the special equivalence class 0. + // They can never be the same in terms of the equivalence class. + if (X->Class[Current] == 0) + return false; + + return X->Class[Current] == Y->Class[Current]; + }; + + return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); } // Compare "moving" part of two InputSections, namely relocation targets. template <class ELFT> bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B) { - for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) { - const Elf_Shdr *RA = A->RelocSections[I]; - const Elf_Shdr *RB = B->RelocSections[I]; - ELFFile<ELFT> &FileA = A->File->getObj(); - ELFFile<ELFT> &FileB = B->File->getObj(); - if (RA->sh_type == SHT_RELA) { - if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB))) - return false; - } else { - if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB))) - return false; - } + if (A->AreRelocsRela) + return variableEq(A, A->relas(), B, B->relas()); + return variableEq(A, A->rels(), B, B->rels()); +} + +template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { + uint32_t Class = Sections[Begin]->Class[Current]; + for (size_t I = Begin + 1; I < End; ++I) + if (Class != Sections[I]->Class[Current]) + return I; + return End; +} + +// Sections in the same equivalence class are contiguous in Sections +// vector. Therefore, Sections vector can be considered as contiguous +// groups of sections, grouped by the class. +// +// This function calls Fn on every group that starts within [Begin, End). +// Note that a group must starts in that range but doesn't necessarily +// have to end before End. +template <class ELFT> +void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, + std::function<void(size_t, size_t)> Fn) { + if (Begin > 0) + Begin = findBoundary(Begin - 1, End); + + while (Begin < End) { + size_t Mid = findBoundary(Begin, Sections.size()); + Fn(Begin, Mid); + Begin = Mid; } - return true; +} + +// Call Fn on each equivalence class. +template <class ELFT> +void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { + // If threading is disabled or the number of sections are + // too small to use threading, call Fn sequentially. + if (!Config->Threads || Sections.size() < 1024) { + forEachClassRange(0, Sections.size(), Fn); + ++Cnt; + return; + } + + Current = Cnt % 2; + Next = (Cnt + 1) % 2; + + // Split sections into 256 shards and call Fn in parallel. + size_t NumShards = 256; + size_t Step = Sections.size() / NumShards; + forLoop(0, NumShards, + [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + forEachClassRange(Step * NumShards, Sections.size(), Fn); + ++Cnt; } // The main function of ICF. template <class ELFT> void ICF<ELFT>::run() { - // Initially, we use hash values as section group IDs. Therefore, - // if two sections have the same ID, they are likely (but not - // guaranteed) to have the same static contents in terms of ICF. - std::vector<InputSection<ELFT> *> V = getSections(); - for (InputSection<ELFT> *S : V) - // Set MSB on to avoid collisions with serial group IDs - S->GroupId = getHash(S) | (uint64_t(1) << 63); - - // From now on, sections in V are ordered so that sections in - // the same group are consecutive in the vector. - std::stable_sort(V.begin(), V.end(), + // Collect sections to merge. + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) + if (auto *S = dyn_cast<InputSection<ELFT>>(Sec)) + if (isEligible(S)) + Sections.push_back(S); + + // Initially, we use hash values to partition sections. + for (InputSection<ELFT> *S : Sections) + // Set MSB to 1 to avoid collisions with non-hash IDs. + S->Class[0] = getHash(S) | (1 << 31); + + // From now on, sections in Sections vector are ordered so that sections + // in the same equivalence class are consecutive in the vector. + std::stable_sort(Sections.begin(), Sections.end(), [](InputSection<ELFT> *A, InputSection<ELFT> *B) { - return A->GroupId < B->GroupId; + return A->Class[0] < B->Class[0]; }); // Compare static contents and assign unique IDs for each static content. - forEachGroup(V, equalsConstant); + forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); - // Split groups by comparing relocations until we get a convergence. - int Cnt = 1; - for (;;) { - ++Cnt; - uint64_t Id = NextId; - forEachGroup(V, equalsVariable); - if (Id == NextId) - break; - } - log("ICF needed " + Twine(Cnt) + " iterations."); - - // Merge sections in the same group. - for (auto I = V.begin(), E = V.end(); I != E;) { - InputSection<ELFT> *Head = *I++; - auto Bound = std::find_if(I, E, [&](InputSection<ELFT> *S) { - return Head->GroupId != S->GroupId; - }); - if (I == Bound) - continue; - log("selected " + Head->getSectionName()); - while (I != Bound) { - InputSection<ELFT> *S = *I++; - log(" removed " + S->getSectionName()); - Head->replace(S); + // Split groups by comparing relocations until convergence is obtained. + do { + Repeat = false; + forEachClass( + [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); + } while (Repeat); + + log("ICF needed " + Twine(Cnt) + " iterations"); + + // Merge sections by the equivalence class. + forEachClass([&](size_t Begin, size_t End) { + if (End - Begin == 1) + return; + + log("selected " + Sections[Begin]->Name); + for (size_t I = Begin + 1; I < End; ++I) { + log(" removed " + Sections[I]->Name); + Sections[Begin]->replace(Sections[I]); } - } + }); } // ICF entry point function. diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp index 426d9c39715d..2a8659921463 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -11,13 +11,20 @@ #include "Driver.h" #include "Error.h" #include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/LTO/LTO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -29,8 +36,68 @@ using namespace llvm::sys::fs; using namespace lld; using namespace lld::elf; +namespace { +// In ELF object file all section addresses are zero. If we have multiple +// .text sections (when using -ffunction-section or comdat group) then +// LLVM DWARF parser will not be able to parse .debug_line correctly, unless +// we assign each section some unique address. This callback method assigns +// each section an address equal to its offset in ELF object file. +class ObjectInfo : public LoadedObjectInfo { +public: + uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { + return static_cast<const ELFSectionRef &>(Sec).getOffset(); + } + std::unique_ptr<LoadedObjectInfo> clone() const override { + return std::unique_ptr<LoadedObjectInfo>(); + } +}; +} + +template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { + std::unique_ptr<object::ObjectFile> Obj = + check(object::ObjectFile::createObjectFile(this->MB), + "createObjectFile failed"); + + ObjectInfo ObjInfo; + DWARFContextInMemory Dwarf(*Obj, &ObjInfo); + DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); + DataExtractor LineData(Dwarf.getLineSection().Data, + ELFT::TargetEndianness == support::little, + ELFT::Is64Bits ? 8 : 4); + + // The second parameter is offset in .debug_line section + // for compilation unit (CU) of interest. We have only one + // CU (object file), so offset is always 0. + DwarfLine->getOrParseLineTable(LineData, 0); +} + +// Returns source line information for a given offset +// using DWARF debug info. +template <class ELFT> +std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase<ELFT> *S, + uintX_t Offset) { + if (!DwarfLine) + initializeDwarfLine(); + + // The offset to CU is 0. + const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); + if (!Tbl) + return ""; + + // Use fake address calcuated by adding section file offset and offset in + // section. See comments for ObjectInfo class. + DILineInfo Info; + Tbl->getFileLineInfoForAddress( + S->Offset + Offset, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); + if (Info.Line == 0) + return ""; + return convertToUnixPathSeparator(Info.FileName) + ":" + + std::to_string(Info.Line); +} + // Returns "(internal)", "foo.a(bar.o)" or "baz.o". -std::string elf::getFilename(const InputFile *F) { +std::string elf::toString(const InputFile *F) { if (!F) return "(internal)"; if (!F->ArchiveName.empty()) @@ -38,15 +105,6 @@ std::string elf::getFilename(const InputFile *F) { return F->getName(); } -template <class ELFT> -static ELFFile<ELFT> createELFObj(MemoryBufferRef MB) { - std::error_code EC; - ELFFile<ELFT> F(MB.getBuffer(), EC); - if (EC) - error(EC, "failed to read " + MB.getBufferIdentifier()); - return F; -} - template <class ELFT> static ELFKind getELFKind() { if (ELFT::TargetEndianness == support::little) return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; @@ -54,41 +112,31 @@ template <class ELFT> static ELFKind getELFKind() { } template <class ELFT> -ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) - : InputFile(K, MB), ELFObj(createELFObj<ELFT>(MB)) { +ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { EKind = getELFKind<ELFT>(); - EMachine = ELFObj.getHeader()->e_machine; + EMachine = getObj().getHeader()->e_machine; + OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; } template <class ELFT> -typename ELFT::SymRange ELFFileBase<ELFT>::getElfSymbols(bool OnlyGlobals) { - if (!Symtab) - return Elf_Sym_Range(nullptr, nullptr); - Elf_Sym_Range Syms = ELFObj.symbols(Symtab); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - uint32_t FirstNonLocal = Symtab->sh_info; - if (FirstNonLocal > NumSymbols) - fatal(getFilename(this) + ": invalid sh_info in symbol table"); - - if (OnlyGlobals) - return makeArrayRef(Syms.begin() + FirstNonLocal, Syms.end()); - return makeArrayRef(Syms.begin(), Syms.end()); +typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { + return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); } template <class ELFT> uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { - uint32_t I = Sym.st_shndx; - if (I == ELF::SHN_XINDEX) - return ELFObj.getExtendedSymbolTableIndex(&Sym, Symtab, SymtabSHNDX); - if (I >= ELF::SHN_LORESERVE) - return 0; - return I; + return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX)); } -template <class ELFT> void ELFFileBase<ELFT>::initStringTable() { - if (!Symtab) - return; - StringTable = check(ELFObj.getStringTableForSymtab(*Symtab)); +template <class ELFT> +void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr *Symtab) { + FirstNonLocal = Symtab->sh_info; + Symbols = check(getObj().symbols(Symtab)); + if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) + fatal(toString(this) + ": invalid sh_info in symbol table"); + + StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections)); } template <class ELFT> @@ -97,37 +145,25 @@ elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { - if (!this->Symtab) - return this->SymbolBodies; - uint32_t FirstNonLocal = this->Symtab->sh_info; - return makeArrayRef(this->SymbolBodies).slice(FirstNonLocal); + return makeArrayRef(this->SymbolBodies).slice(this->FirstNonLocal); } template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { - if (!this->Symtab) + if (this->SymbolBodies.empty()) return this->SymbolBodies; - uint32_t FirstNonLocal = this->Symtab->sh_info; - return makeArrayRef(this->SymbolBodies).slice(1, FirstNonLocal - 1); + return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); } template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() { - if (!this->Symtab) + if (this->SymbolBodies.empty()) return this->SymbolBodies; return makeArrayRef(this->SymbolBodies).slice(1); } -template <class ELFT> uint32_t elf::ObjectFile<ELFT>::getMipsGp0() const { - if (ELFT::Is64Bits && MipsOptions && MipsOptions->Reginfo) - return MipsOptions->Reginfo->ri_gp_value; - if (!ELFT::Is64Bits && MipsReginfo && MipsReginfo->Reginfo) - return MipsReginfo->Reginfo->ri_gp_value; - return 0; -} - template <class ELFT> -void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { +void elf::ObjectFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { // Read section and symbol tables. initializeSections(ComdatGroups); initializeSymbols(); @@ -137,22 +173,25 @@ void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) { // They are identified and deduplicated by group name. This function // returns a group name. template <class ELFT> -StringRef elf::ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) { - const ELFFile<ELFT> &Obj = this->ELFObj; - const Elf_Shdr *Symtab = check(Obj.getSection(Sec.sh_link)); - const Elf_Sym *Sym = Obj.getSymbol(Symtab, Sec.sh_info); - StringRef Strtab = check(Obj.getStringTableForSymtab(*Symtab)); - return check(Sym->getName(Strtab)); +StringRef +elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec) { + if (this->Symbols.empty()) + this->initSymtab(Sections, + check(object::getSection<ELFT>(Sections, Sec.sh_link))); + const Elf_Sym *Sym = + check(object::getSymbol<ELFT>(this->Symbols, Sec.sh_info)); + return check(Sym->getName(this->StringTable)); } template <class ELFT> ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { - const ELFFile<ELFT> &Obj = this->ELFObj; + const ELFFile<ELFT> &Obj = this->getObj(); ArrayRef<Elf_Word> Entries = check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); if (Entries.empty() || Entries[0] != GRP_COMDAT) - fatal(getFilename(this) + ": unsupported SHT_GROUP format"); + fatal(toString(this) + ": unsupported SHT_GROUP format"); return Entries.slice(1); } @@ -163,15 +202,39 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { if (Config->Optimize == 0) return false; + // Do not merge sections if generating a relocatable object. It makes + // the code simpler because we do not need to update relocation addends + // to reflect changes introduced by merging. Instead of that we write + // such "merge" sections into separate OutputSections and keep SHF_MERGE + // / SHF_STRINGS flags and sh_entsize value to be able to perform merging + // later during a final linking. + if (Config->Relocatable) + return false; + + // A mergeable section with size 0 is useless because they don't have + // any data to merge. A mergeable string section with size 0 can be + // argued as invalid because it doesn't end with a null character. + // We'll avoid a mess by handling them as if they were non-mergeable. + if (Sec.sh_size == 0) + return false; + + // Check for sh_entsize. The ELF spec is not clear about the zero + // sh_entsize. It says that "the member [sh_entsize] contains 0 if + // the section does not hold a table of fixed-size entries". We know + // that Rust 1.13 produces a string mergeable section with a zero + // sh_entsize. Here we just accept it rather than being picky about it. + uintX_t EntSize = Sec.sh_entsize; + if (EntSize == 0) + return false; + if (Sec.sh_size % EntSize) + fatal(toString(this) + + ": SHF_MERGE section size must be a multiple of sh_entsize"); + uintX_t Flags = Sec.sh_flags; if (!(Flags & SHF_MERGE)) return false; if (Flags & SHF_WRITE) - fatal(getFilename(this) + ": writable SHF_MERGE section is not supported"); - uintX_t EntSize = Sec.sh_entsize; - if (!EntSize || Sec.sh_size % EntSize) - fatal(getFilename(this) + - ": SHF_MERGE section size must be a multiple of sh_entsize"); + fatal(toString(this) + ": writable SHF_MERGE section is not supported"); // Don't try to merge if the alignment is larger than the sh_entsize and this // is not SHF_STRINGS. @@ -187,74 +250,61 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { template <class ELFT> void elf::ObjectFile<ELFT>::initializeSections( - DenseSet<StringRef> &ComdatGroups) { - uint64_t Size = this->ELFObj.getNumSections(); + DenseSet<CachedHashStringRef> &ComdatGroups) { + ArrayRef<Elf_Shdr> ObjSections = check(this->getObj().sections()); + const ELFFile<ELFT> &Obj = this->getObj(); + uint64_t Size = ObjSections.size(); Sections.resize(Size); unsigned I = -1; - const ELFFile<ELFT> &Obj = this->ELFObj; - for (const Elf_Shdr &Sec : Obj.sections()) { + StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections)); + for (const Elf_Shdr &Sec : ObjSections) { ++I; if (Sections[I] == &InputSection<ELFT>::Discarded) continue; + // SHF_EXCLUDE'ed sections are discarded by the linker. However, + // if -r is given, we'll let the final link discard such sections. + // This is compatible with GNU. + if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { + Sections[I] = &InputSection<ELFT>::Discarded; + continue; + } + switch (Sec.sh_type) { case SHT_GROUP: Sections[I] = &InputSection<ELFT>::Discarded; - if (ComdatGroups.insert(getShtGroupSignature(Sec)).second) + if (ComdatGroups.insert(CachedHashStringRef( + getShtGroupSignature(ObjSections, Sec))) + .second) continue; for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) - fatal(getFilename(this) + ": invalid section index in group: " + + fatal(toString(this) + ": invalid section index in group: " + Twine(SecIndex)); Sections[SecIndex] = &InputSection<ELFT>::Discarded; } break; case SHT_SYMTAB: - this->Symtab = &Sec; + this->initSymtab(ObjSections, &Sec); break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections)); break; case SHT_STRTAB: case SHT_NULL: break; - case SHT_RELA: - case SHT_REL: { - // This section contains relocation information. - // If -r is given, we do not interpret or apply relocation - // but just copy relocation sections to output. - if (Config->Relocatable) { - Sections[I] = new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); - break; - } - - // Find the relocation target section and associate this - // section with it. - InputSectionBase<ELFT> *Target = getRelocTarget(Sec); - if (!Target) - break; - if (auto *S = dyn_cast<InputSection<ELFT>>(Target)) { - S->RelocSections.push_back(&Sec); - break; - } - if (auto *S = dyn_cast<EhInputSection<ELFT>>(Target)) { - if (S->RelocSection) - fatal( - getFilename(this) + - ": multiple relocation sections to .eh_frame are not supported"); - S->RelocSection = &Sec; - break; - } - fatal(getFilename(this) + - ": relocations pointing to SHF_MERGE are not supported"); - } - case SHT_ARM_ATTRIBUTES: - // FIXME: ARM meta-data section. At present attributes are ignored, - // they can be used to reason about object compatibility. - Sections[I] = &InputSection<ELFT>::Discarded; - break; default: - Sections[I] = createInputSection(Sec); + Sections[I] = createInputSection(Sec, SectionStringTable); + } + + // .ARM.exidx sections have a reverse dependency on the InputSection they + // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. + if (Sec.sh_flags & SHF_LINK_ORDER) { + if (Sec.sh_link >= Sections.size()) + fatal(toString(this) + ": invalid sh_link index: " + + Twine(Sec.sh_link)); + auto *IS = cast<InputSection<ELFT>>(Sections[Sec.sh_link]); + IS->DependentSection = Sections[I]; } } } @@ -264,8 +314,7 @@ InputSectionBase<ELFT> * elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { uint32_t Idx = Sec.sh_info; if (Idx >= Sections.size()) - fatal(getFilename(this) + ": invalid relocated section index: " + - Twine(Idx)); + fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); InputSectionBase<ELFT> *Target = Sections[Idx]; // Strictly speaking, a relocation section must be included in the @@ -275,14 +324,65 @@ elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { return nullptr; if (!Target) - fatal(getFilename(this) + ": unsupported relocation reference"); + fatal(toString(this) + ": unsupported relocation reference"); return Target; } template <class ELFT> InputSectionBase<ELFT> * -elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { - StringRef Name = check(this->ELFObj.getSectionName(&Sec)); +elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable) { + StringRef Name = + check(this->getObj().getSectionName(&Sec, SectionStringTable)); + + switch (Sec.sh_type) { + case SHT_ARM_ATTRIBUTES: + // FIXME: ARM meta-data section. Retain the first attribute section + // we see. The eglibc ARM dynamic loaders require the presence of an + // attribute section for dlopen to work. + // In a full implementation we would merge all attribute sections. + if (In<ELFT>::ARMAttributes == nullptr) { + In<ELFT>::ARMAttributes = make<InputSection<ELFT>>(this, &Sec, Name); + return In<ELFT>::ARMAttributes; + } + return &InputSection<ELFT>::Discarded; + case SHT_RELA: + case SHT_REL: { + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (Config->Relocatable) + return make<InputSection<ELFT>>(this, &Sec, Name); + + // Find the relocation target section and associate this + // section with it. + InputSectionBase<ELFT> *Target = getRelocTarget(Sec); + if (!Target) + return nullptr; + if (Target->FirstRelocation) + fatal(toString(this) + + ": multiple relocation sections to one section are not supported"); + if (!isa<InputSection<ELFT>>(Target) && !isa<EhInputSection<ELFT>>(Target)) + fatal(toString(this) + + ": relocations pointing to SHF_MERGE are not supported"); + + size_t NumRelocations; + if (Sec.sh_type == SHT_RELA) { + ArrayRef<Elf_Rela> Rels = check(this->getObj().relas(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = true; + } else { + ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec)); + Target->FirstRelocation = Rels.begin(); + NumRelocations = Rels.size(); + Target->AreRelocsRela = false; + } + assert(isUInt<31>(NumRelocations)); + Target->NumRelocations = NumRelocations; + return nullptr; + } + } // .note.GNU-stack is a marker section to control the presence of // PT_GNU_STACK segment in outputs. Since the presence of the segment @@ -296,39 +396,23 @@ elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) { return &InputSection<ELFT>::Discarded; } - if (Config->StripDebug && Name.startswith(".debug")) + if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) return &InputSection<ELFT>::Discarded; - // A MIPS object file has a special sections that contain register - // usage info, which need to be handled by the linker specially. - if (Config->EMachine == EM_MIPS) { - if (Name == ".reginfo") { - MipsReginfo.reset(new MipsReginfoInputSection<ELFT>(this, &Sec)); - return MipsReginfo.get(); - } - if (Name == ".MIPS.options") { - MipsOptions.reset(new MipsOptionsInputSection<ELFT>(this, &Sec)); - return MipsOptions.get(); - } - } - // The linker merges EH (exception handling) frames and creates a // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (Name == ".eh_frame" && !Config->Relocatable) - return new (EHAlloc.Allocate()) EhInputSection<ELFT>(this, &Sec); + return make<EhInputSection<ELFT>>(this, &Sec, Name); if (shouldMerge(Sec)) - return new (MAlloc.Allocate()) MergeInputSection<ELFT>(this, &Sec); - return new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec); + return make<MergeInputSection<ELFT>>(this, &Sec, Name); + return make<InputSection<ELFT>>(this, &Sec, Name); } template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { - this->initStringTable(); - Elf_Sym_Range Syms = this->getElfSymbols(false); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - SymbolBodies.reserve(NumSymbols); - for (const Elf_Sym &Sym : Syms) + SymbolBodies.reserve(this->Symbols.size()); + for (const Elf_Sym &Sym : this->Symbols) SymbolBodies.push_back(createSymbolBody(&Sym)); } @@ -336,12 +420,23 @@ template <class ELFT> InputSectionBase<ELFT> * elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); - if (Index == 0) - return nullptr; - if (Index >= Sections.size() || !Sections[Index]) - fatal(getFilename(this) + ": invalid section index: " + Twine(Index)); + if (Index >= Sections.size()) + fatal(toString(this) + ": invalid section index: " + Twine(Index)); InputSectionBase<ELFT> *S = Sections[Index]; - if (S == &InputSectionBase<ELFT>::Discarded) + + // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could + // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be + // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. + // In this case it is fine for section to be null here as we do not + // allocate sections of these types. + if (!S) { + if (Index == 0 || Sym.getType() == STT_SECTION || + Sym.getType() == STT_NOTYPE) + return nullptr; + fatal(toString(this) + ": invalid section index: " + Twine(Index)); + } + + if (S == &InputSection<ELFT>::Discarded) return S; return S->Repl; } @@ -350,11 +445,26 @@ template <class ELFT> SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); InputSectionBase<ELFT> *Sec = getSection(*Sym); + + uint8_t StOther = Sym->st_other; + uint8_t Type = Sym->getType(); + uintX_t Value = Sym->st_value; + uintX_t Size = Sym->st_size; + if (Binding == STB_LOCAL) { + if (Sym->getType() == STT_FILE) + SourceFile = check(Sym->getName(this->StringTable)); + + if (this->StringTable.size() <= Sym->st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + StringRefZ Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) - return new (this->Alloc) - Undefined(Sym->st_name, Sym->st_other, Sym->getType(), this); - return new (this->Alloc) DefinedRegular<ELFT>(*Sym, Sec); + return new (BAlloc) + Undefined(Name, /*IsLocal=*/true, StOther, Type, this); + + return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther, + Type, Value, Size, Sec, this); } StringRef Name = check(Sym->getName(this->StringTable)); @@ -362,33 +472,38 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { switch (Sym->st_shndx) { case SHN_UNDEF: return elf::Symtab<ELFT>::X - ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), - /*CanOmitFromDynSym*/ false, this) + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) ->body(); case SHN_COMMON: + if (Value == 0 || Value >= UINT32_MAX) + fatal(toString(this) + ": common symbol '" + Name + + "' has invalid alignment: " + Twine(Value)); return elf::Symtab<ELFT>::X - ->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other, - Sym->getType(), this) + ->addCommon(Name, Size, Value, Binding, StOther, Type, this) ->body(); } switch (Binding) { default: - fatal(getFilename(this) + ": unexpected binding: " + Twine(Binding)); + fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); case STB_GLOBAL: case STB_WEAK: case STB_GNU_UNIQUE: if (Sec == &InputSection<ELFT>::Discarded) return elf::Symtab<ELFT>::X - ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), - /*CanOmitFromDynSym*/ false, this) + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, + /*CanOmitFromDynSym=*/false, this) ->body(); - return elf::Symtab<ELFT>::X->addRegular(Name, *Sym, Sec)->body(); + return elf::Symtab<ELFT>::X + ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) + ->body(); } } template <class ELFT> void ArchiveFile::parse() { - File = check(Archive::create(MB), "failed to parse archive"); + File = check(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) @@ -396,13 +511,14 @@ template <class ELFT> void ArchiveFile::parse() { } // Returns a buffer pointing to a member file containing a given symbol. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +std::pair<MemoryBufferRef, uint64_t> +ArchiveFile::getMember(const Archive::Symbol *Sym) { Archive::Child C = check(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); if (!Seen.insert(C.getChildOffset()).second) - return MemoryBufferRef(); + return {MemoryBufferRef(), 0}; MemoryBufferRef Ret = check(C.getMemoryBufferRef(), @@ -412,8 +528,9 @@ MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { if (C.getParent()->isThin() && Driver->Cpio) Driver->Cpio->append(relativeToRoot(check(C.getFullName())), Ret.getBuffer()); - - return Ret; + if (C.getParent()->isThin()) + return {Ret, 0}; + return {Ret, C.getChildOffset()}; } template <class ELFT> @@ -423,32 +540,29 @@ SharedFile<ELFT>::SharedFile(MemoryBufferRef M) template <class ELFT> const typename ELFT::Shdr * SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { - uint32_t Index = this->getSectionIndex(Sym); - if (Index == 0) - return nullptr; - return check(this->ELFObj.getSection(Index)); + return check( + this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX)); } // Partially parse the shared object file so that we can call // getSoName on this object. template <class ELFT> void SharedFile<ELFT>::parseSoName() { - typedef typename ELFT::Dyn Elf_Dyn; - typedef typename ELFT::uint uintX_t; const Elf_Shdr *DynamicSec = nullptr; - const ELFFile<ELFT> Obj = this->ELFObj; - for (const Elf_Shdr &Sec : Obj.sections()) { + const ELFFile<ELFT> Obj = this->getObj(); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { default: continue; case SHT_DYNSYM: - this->Symtab = &Sec; + this->initSymtab(Sections, &Sec); break; case SHT_DYNAMIC: DynamicSec = &Sec; break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec)); + this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections)); break; case SHT_GNU_versym: this->VersymSec = &Sec; @@ -459,20 +573,25 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { } } - this->initStringTable(); + if (this->VersymSec && this->Symbols.empty()) + error("SHT_GNU_versym should be associated with symbol table"); + + // DSOs are identified by soname, and they usually contain + // DT_SONAME tag in their header. But if they are missing, + // filenames are used as default sonames. SoName = sys::path::filename(this->getName()); if (!DynamicSec) return; - auto *Begin = - reinterpret_cast<const Elf_Dyn *>(Obj.base() + DynamicSec->sh_offset); - const Elf_Dyn *End = Begin + DynamicSec->sh_size / sizeof(Elf_Dyn); - for (const Elf_Dyn &Dyn : make_range(Begin, End)) { + ArrayRef<Elf_Dyn> Arr = + check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), + toString(this) + ": getSectionContentsAsArray failed"); + for (const Elf_Dyn &Dyn : Arr) { if (Dyn.d_tag == DT_SONAME) { uintX_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) - fatal(getFilename(this) + ": invalid DT_SONAME entry"); + fatal(toString(this) + ": invalid DT_SONAME entry"); SoName = StringRef(this->StringTable.data() + Val); return; } @@ -494,9 +613,9 @@ SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { return Verdefs; // The location of the first global versym entry. - Versym = reinterpret_cast<const Elf_Versym *>(this->ELFObj.base() + - VersymSec->sh_offset) + - this->Symtab->sh_info; + const char *Base = this->MB.getBuffer().data(); + Versym = reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) + + this->FirstNonLocal; // We cannot determine the largest verdef identifier without inspecting // every Elf_Verdef, but both bfd and gold assign verdef identifiers @@ -507,7 +626,7 @@ SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) { // Build the Verdefs array by following the chain of Elf_Verdef objects // from the start of the .gnu.version_d section. - const uint8_t *Verdef = this->ELFObj.base() + VerdefSec->sh_offset; + const char *Verdef = Base + VerdefSec->sh_offset; for (unsigned I = 0; I != VerdefCount; ++I) { auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef); Verdef += CurVerdef->vd_next; @@ -526,7 +645,7 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { const Elf_Versym *Versym = nullptr; std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym); - Elf_Sym_Range Syms = this->getElfSymbols(true); + Elf_Sym_Range Syms = this->getGlobalSymbols(); for (const Elf_Sym &Sym : Syms) { unsigned VersymIndex = 0; if (Versym) { @@ -552,18 +671,16 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { } } -static ELFKind getELFKind(MemoryBufferRef MB) { - std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); - Triple TheTriple(TripleStr); - bool Is64Bits = TheTriple.isArch64Bit(); - if (TheTriple.isLittleEndian()) - return Is64Bits ? ELF64LEKind : ELF32LEKind; - return Is64Bits ? ELF64BEKind : ELF32BEKind; +static ELFKind getBitcodeELFKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + if (T.isLittleEndian()) + return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; + return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } -static uint8_t getMachineKind(MemoryBufferRef MB) { - std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context); - switch (Triple(TripleStr).getArch()) { +static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { + Triple T(check(getBitcodeTargetTriple(MB))); + switch (T.getArch()) { case Triple::aarch64: return EM_AARCH64; case Triple::arm: @@ -578,23 +695,22 @@ static uint8_t getMachineKind(MemoryBufferRef MB) { case Triple::ppc64: return EM_PPC64; case Triple::x86: - return EM_386; + return T.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: return EM_X86_64; default: fatal(MB.getBufferIdentifier() + - ": could not infer e_machine from bitcode target triple " + - TripleStr); + ": could not infer e_machine from bitcode target triple " + T.str()); } } BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { - EKind = getELFKind(MB); - EMachine = getMachineKind(MB); + EKind = getBitcodeELFKind(MB); + EMachine = getBitcodeMachineKind(MB); } -static uint8_t getGvVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { +static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { + switch (GvVisibility) { case GlobalValue::DefaultVisibility: return STV_DEFAULT; case GlobalValue::HiddenVisibility: @@ -606,124 +722,134 @@ static uint8_t getGvVisibility(const GlobalValue *GV) { } template <class ELFT> -Symbol *BitcodeFile::createSymbol(const DenseSet<const Comdat *> &KeptComdats, - const IRObjectFile &Obj, - const BasicSymbolRef &Sym) { - const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl()); - - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - StringRef NameRef = Saver.save(StringRef(Name)); - - uint32_t Flags = Sym.getFlags(); - bool IsWeak = Flags & BasicSymbolRef::SF_Weak; - uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL; - - uint8_t Type = STT_NOTYPE; - bool CanOmitFromDynSym = false; - // FIXME: Expose a thread-local flag for module asm symbols. - if (GV) { - if (GV->isThreadLocal()) - Type = STT_TLS; - CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV); - } - - uint8_t Visibility; - if (GV) - Visibility = getGvVisibility(GV); - else - // FIXME: Set SF_Hidden flag correctly for module asm symbols, and expose - // protected visibility. - Visibility = STV_DEFAULT; - - if (GV) - if (const Comdat *C = GV->getComdat()) - if (!KeptComdats.count(C)) - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, this); +static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, + const lto::InputFile::Symbol &ObjSym, + BitcodeFile *F) { + StringRef NameRef = Saver.save(ObjSym.getName()); + uint32_t Flags = ObjSym.getFlags(); + uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + + uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; + uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); + bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); + + int C = check(ObjSym.getComdatIndex()); + if (C != -1 && !KeptComdats[C]) + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); - const Module &M = Obj.getModule(); if (Flags & BasicSymbolRef::SF_Undefined) - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, - CanOmitFromDynSym, this); - if (Flags & BasicSymbolRef::SF_Common) { - // FIXME: Set SF_Common flag correctly for module asm symbols, and expose - // size and alignment. - assert(GV); - const DataLayout &DL = M.getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); - return Symtab<ELFT>::X->addCommon(NameRef, Size, GV->getAlignment(), - Binding, Visibility, STT_OBJECT, this); - } - return Symtab<ELFT>::X->addBitcode(NameRef, IsWeak, Visibility, Type, - CanOmitFromDynSym, this); -} + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, + Visibility, Type, CanOmitFromDynSym, + F); + + if (Flags & BasicSymbolRef::SF_Common) + return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), + ObjSym.getCommonAlignment(), Binding, + Visibility, STT_OBJECT, F); -bool BitcodeFile::shouldSkip(uint32_t Flags) { - return !(Flags & BasicSymbolRef::SF_Global) || - (Flags & BasicSymbolRef::SF_FormatSpecific); + return Symtab<ELFT>::X->addBitcode(NameRef, Binding, Visibility, Type, + CanOmitFromDynSym, F); } template <class ELFT> -void BitcodeFile::parse(DenseSet<StringRef> &ComdatGroups) { - Obj = check(IRObjectFile::create(MB, Driver->Context)); - const Module &M = Obj->getModule(); - - DenseSet<const Comdat *> KeptComdats; - for (const auto &P : M.getComdatSymbolTable()) { - StringRef N = Saver.save(P.first()); - if (ComdatGroups.insert(N).second) - KeptComdats.insert(&P.second); +void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { + + // Here we pass a new MemoryBufferRef which is identified by ArchiveName + // (the fully resolved path of the archive) + member name + offset of the + // member in the archive. + // ThinLTO uses the MemoryBufferRef identifier to access its internal + // data structures and if two archives define two members with the same name, + // this causes a collision which result in only one of the objects being + // taken into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). + Obj = check(lto::InputFile::create(MemoryBufferRef( + MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + + utostr(OffsetInArchive))))); + + std::vector<bool> KeptComdats; + for (StringRef S : Obj->getComdatTable()) { + StringRef N = Saver.save(S); + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); } - for (const BasicSymbolRef &Sym : Obj->symbols()) - if (!shouldSkip(Sym.getFlags())) - Symbols.push_back(createSymbol<ELFT>(KeptComdats, *Obj, Sym)); + for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) + Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); } template <template <class> class T> -static std::unique_ptr<InputFile> createELFFile(MemoryBufferRef MB) { +static InputFile *createELFFile(MemoryBufferRef MB) { unsigned char Size; unsigned char Endian; std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) - fatal("invalid data encoding: " + MB.getBufferIdentifier()); + fatal(MB.getBufferIdentifier() + ": invalid data encoding"); + + size_t BufSize = MB.getBuffer().size(); + if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || + (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) + fatal(MB.getBufferIdentifier() + ": file is too short"); - std::unique_ptr<InputFile> Obj; + InputFile *Obj; if (Size == ELFCLASS32 && Endian == ELFDATA2LSB) - Obj.reset(new T<ELF32LE>(MB)); + Obj = make<T<ELF32LE>>(MB); else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB) - Obj.reset(new T<ELF32BE>(MB)); + Obj = make<T<ELF32BE>>(MB); else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB) - Obj.reset(new T<ELF64LE>(MB)); + Obj = make<T<ELF64LE>>(MB); else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB) - Obj.reset(new T<ELF64BE>(MB)); + Obj = make<T<ELF64BE>>(MB); else - fatal("invalid file class: " + MB.getBufferIdentifier()); + fatal(MB.getBufferIdentifier() + ": invalid file class"); if (!Config->FirstElf) - Config->FirstElf = Obj.get(); + Config->FirstElf = Obj; return Obj; } +template <class ELFT> void BinaryFile::parse() { + StringRef Buf = MB.getBuffer(); + ArrayRef<uint8_t> Data = + makeArrayRef<uint8_t>((const uint8_t *)Buf.data(), Buf.size()); + + std::string Filename = MB.getBufferIdentifier(); + std::transform(Filename.begin(), Filename.end(), Filename.begin(), + [](char C) { return isalnum(C) ? C : '_'; }); + Filename = "_binary_" + Filename; + StringRef StartName = Saver.save(Twine(Filename) + "_start"); + StringRef EndName = Saver.save(Twine(Filename) + "_end"); + StringRef SizeName = Saver.save(Twine(Filename) + "_size"); + + auto *Section = + make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 8, Data, ".data"); + Sections.push_back(Section); + + elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, + STB_GLOBAL, Section, nullptr); + elf::Symtab<ELFT>::X->addRegular(EndName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, Section, + nullptr); + elf::Symtab<ELFT>::X->addRegular(SizeName, STV_DEFAULT, STT_OBJECT, + Data.size(), 0, STB_GLOBAL, nullptr, + nullptr); +} + static bool isBitcode(MemoryBufferRef MB) { using namespace sys::fs; return identify_magic(MB.getBuffer()) == file_magic::bitcode; } -std::unique_ptr<InputFile> elf::createObjectFile(MemoryBufferRef MB, - StringRef ArchiveName) { - std::unique_ptr<InputFile> F; - if (isBitcode(MB)) - F.reset(new BitcodeFile(MB)); - else - F = createELFFile<ObjectFile>(MB); +InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, + uint64_t OffsetInArchive) { + InputFile *F = + isBitcode(MB) ? make<BitcodeFile>(MB) : createELFFile<ObjectFile>(MB); F->ArchiveName = ArchiveName; + F->OffsetInArchive = OffsetInArchive; return F; } -std::unique_ptr<InputFile> elf::createSharedFile(MemoryBufferRef MB) { +InputFile *elf::createSharedFile(MemoryBufferRef MB) { return createELFFile<SharedFile>(MB); } @@ -734,8 +860,7 @@ MemoryBufferRef LazyObjectFile::getBuffer() { return MB; } -template <class ELFT> -void LazyObjectFile::parse() { +template <class ELFT> void LazyObjectFile::parse() { for (StringRef Sym : getSymbols()) Symtab<ELFT>::X->addLazyObject(Sym, *this); } @@ -745,13 +870,14 @@ template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::SymRange Elf_Sym_Range; - const ELFFile<ELFT> Obj = createELFObj<ELFT>(this->MB); - for (const Elf_Shdr &Sec : Obj.sections()) { + const ELFFile<ELFT> Obj(this->MB.getBuffer()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + for (const Elf_Shdr &Sec : Sections) { if (Sec.sh_type != SHT_SYMTAB) continue; - Elf_Sym_Range Syms = Obj.symbols(&Sec); + Elf_Sym_Range Syms = check(Obj.symbols(&Sec)); uint32_t FirstNonLocal = Sec.sh_info; - StringRef StringTable = check(Obj.getStringTableForSymtab(Sec)); + StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections)); std::vector<StringRef> V; for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) if (Sym.st_shndx != SHN_UNDEF) @@ -762,21 +888,11 @@ template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { } std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { - LLVMContext Context; - std::unique_ptr<IRObjectFile> Obj = - check(IRObjectFile::create(this->MB, Context)); + std::unique_ptr<lto::InputFile> Obj = check(lto::InputFile::create(this->MB)); std::vector<StringRef> V; - for (const BasicSymbolRef &Sym : Obj->symbols()) { - uint32_t Flags = Sym.getFlags(); - if (BitcodeFile::shouldSkip(Flags)) - continue; - if (Flags & BasicSymbolRef::SF_Undefined) - continue; - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - V.push_back(Saver.save(StringRef(Name))); - } + for (const lto::InputFile::Symbol &Sym : Obj->symbols()) + if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + V.push_back(Saver.save(Sym.getName())); return V; } @@ -803,10 +919,10 @@ template void ArchiveFile::parse<ELF32BE>(); template void ArchiveFile::parse<ELF64LE>(); template void ArchiveFile::parse<ELF64BE>(); -template void BitcodeFile::parse<ELF32LE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF32BE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF64LE>(DenseSet<StringRef> &); -template void BitcodeFile::parse<ELF64BE>(DenseSet<StringRef> &); +template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &); +template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &); template void LazyObjectFile::parse<ELF32LE>(); template void LazyObjectFile::parse<ELF32BE>(); @@ -827,3 +943,8 @@ template class elf::SharedFile<ELF32LE>; template class elf::SharedFile<ELF32BE>; template class elf::SharedFile<ELF64LE>; template class elf::SharedFile<ELF64BE>; + +template void BinaryFile::parse<ELF32LE>(); +template void BinaryFile::parse<ELF32BE>(); +template void BinaryFile::parse<ELF64LE>(); +template void BinaryFile::parse<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.h b/contrib/llvm/tools/lld/ELF/InputFiles.h index 79cb751494b3..aba1d71379b0 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.h +++ b/contrib/llvm/tools/lld/ELF/InputFiles.h @@ -16,16 +16,24 @@ #include "Symbols.h" #include "lld/Core/LLVM.h" +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Comdat.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Support/StringSaver.h" #include <map> +namespace llvm { +class DWARFDebugLine; +namespace lto { +class InputFile; +} +} + namespace lld { namespace elf { @@ -44,6 +52,7 @@ public: LazyObjectKind, ArchiveKind, BitcodeKind, + BinaryKind, }; Kind kind() const { return FileKind; } @@ -56,10 +65,17 @@ public: // string for creating error messages. StringRef ArchiveName; + // If this file is in an archive, the member contains the offset of + // the file in the archive. Otherwise, it's just zero. We store this + // field so that we can pass it to lib/LTO in order to disambiguate + // between objects. + uint64_t OffsetInArchive; + // If this is an architecture-specific file, the following members // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. ELFKind EKind = ELFNoneKind; uint16_t EMachine = llvm::ELF::EM_NONE; + uint8_t OSABI = 0; protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} @@ -69,7 +85,7 @@ private: }; // Returns "(internal)", "foo.a(bar.o)" or "baz.o". -std::string getFilename(const InputFile *F); +std::string toString(const InputFile *F); template <typename ELFT> class ELFFileBase : public InputFile { public: @@ -84,37 +100,37 @@ public: return K == ObjectKind || K == SharedKind; } - const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; } - llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; } - - uint8_t getOSABI() const { - return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; + llvm::object::ELFFile<ELFT> getObj() const { + return llvm::object::ELFFile<ELFT>(MB.getBuffer()); } StringRef getStringTable() const { return StringTable; } uint32_t getSectionIndex(const Elf_Sym &Sym) const; - Elf_Sym_Range getElfSymbols(bool OnlyGlobals); + Elf_Sym_Range getGlobalSymbols(); protected: - llvm::object::ELFFile<ELFT> ELFObj; - const Elf_Shdr *Symtab = nullptr; + ArrayRef<Elf_Sym> Symbols; + uint32_t FirstNonLocal = 0; ArrayRef<Elf_Word> SymtabSHNDX; StringRef StringTable; - void initStringTable(); + void initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab); }; // .o file. template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::SymRange Elf_Sym_Range; typedef typename ELFT::Word Elf_Word; typedef typename ELFT::uint uintX_t; - StringRef getShtGroupSignature(const Elf_Shdr &Sec); + StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, + const Elf_Shdr &Sec); ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec); public: @@ -127,40 +143,49 @@ public: ArrayRef<SymbolBody *> getNonLocalSymbols(); explicit ObjectFile(MemoryBufferRef M); - void parse(llvm::DenseSet<StringRef> &ComdatGroups); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { + if (SymbolIndex >= SymbolBodies.size()) + fatal(toString(this) + ": invalid symbol index"); return *SymbolBodies[SymbolIndex]; } - template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const { + template <typename RelT> + SymbolBody &getRelocTargetSym(const RelT &Rel) const { uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); return getSymbolBody(SymIndex); } - const Elf_Shdr *getSymbolTable() const { return this->Symtab; }; + // Returns source line information for a given offset. + // If no information is available, returns "". + std::string getLineInfo(InputSectionBase<ELFT> *S, uintX_t Offset); - // Get MIPS GP0 value defined by this file. This value represents the gp value + // MIPS GP0 value defined by this file. This value represents the gp value // used to create the relocatable object and required to support // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. - uint32_t getMipsGp0() const; + uint32_t MipsGp0 = 0; // The number is the offset in the string table. It will be used as the // st_name of the symbol. std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms; - // SymbolBodies and Thunks for sections in this file are allocated - // using this buffer. - llvm::BumpPtrAllocator Alloc; + // Name of source file obtained from STT_FILE symbol value, + // or empty string if there is no such symbol in object file + // symbol table. + StringRef SourceFile; private: - void initializeSections(llvm::DenseSet<StringRef> &ComdatGroups); + void + initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); void initializeSymbols(); + void initializeDwarfLine(); InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); - InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec); + InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable); bool shouldMerge(const Elf_Shdr &Sec); SymbolBody *createSymbolBody(const Elf_Sym *Sym); @@ -171,14 +196,11 @@ private: // List of all symbols referenced or defined by this file. std::vector<SymbolBody *> SymbolBodies; - // MIPS .reginfo section defined by this file. - std::unique_ptr<MipsReginfoInputSection<ELFT>> MipsReginfo; - // MIPS .MIPS.options section defined by this file. - std::unique_ptr<MipsOptionsInputSection<ELFT>> MipsOptions; - - llvm::SpecificBumpPtrAllocator<InputSection<ELFT>> IAlloc; - llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc; - llvm::SpecificBumpPtrAllocator<EhInputSection<ELFT>> EHAlloc; + // Debugging information to retrieve source file and line for error + // reporting. Linker may find reasonable number of errors in a + // single object file, so we cache debugging information in order to + // parse it only once for each object file we link. + std::unique_ptr<llvm::DWARFDebugLine> DwarfLine; }; // LazyObjectFile is analogous to ArchiveFile in the sense that @@ -204,8 +226,6 @@ private: template <class ELFT> std::vector<StringRef> getElfSymbols(); std::vector<StringRef> getBitcodeSymbols(); - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver{Alloc}; bool Seen = false; }; @@ -216,10 +236,11 @@ public: static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } template <class ELFT> void parse(); - // Returns a memory buffer for a given symbol. An empty memory buffer + // Returns a memory buffer for a given symbol and the offset in the archive + // for the member. An empty memory buffer and an offset of zero // is returned if we have already returned the same memory buffer. // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); + std::pair<MemoryBufferRef, uint64_t> getMember(const Archive::Symbol *Sym); private: std::unique_ptr<Archive> File; @@ -231,30 +252,25 @@ public: explicit BitcodeFile(MemoryBufferRef M); static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } template <class ELFT> - void parse(llvm::DenseSet<StringRef> &ComdatGroups); + void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); ArrayRef<Symbol *> getSymbols() { return Symbols; } - static bool shouldSkip(uint32_t Flags); - std::unique_ptr<llvm::object::IRObjectFile> Obj; + std::unique_ptr<llvm::lto::InputFile> Obj; private: std::vector<Symbol *> Symbols; - llvm::BumpPtrAllocator Alloc; - llvm::StringSaver Saver{Alloc}; - template <class ELFT> - Symbol *createSymbol(const llvm::DenseSet<const llvm::Comdat *> &KeptComdats, - const llvm::object::IRObjectFile &Obj, - const llvm::object::BasicSymbolRef &Sym); }; // .so file. template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { typedef ELFFileBase<ELFT> Base; + typedef typename ELFT::Dyn Elf_Dyn; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Word Elf_Word; typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::Versym Elf_Versym; typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Versym Elf_Versym; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; std::vector<StringRef> Undefs; StringRef SoName; @@ -294,9 +310,20 @@ public: bool isNeeded() const { return !AsNeeded || IsUsed; } }; -std::unique_ptr<InputFile> createObjectFile(MemoryBufferRef MB, - StringRef ArchiveName = ""); -std::unique_ptr<InputFile> createSharedFile(MemoryBufferRef MB); +class BinaryFile : public InputFile { +public: + explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BinaryKind; } + template <class ELFT> void parse(); + ArrayRef<InputSectionData *> getSections() const { return Sections; } + +private: + std::vector<InputSectionData *> Sections; +}; + +InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "", + uint64_t OffsetInArchive = 0); +InputFile *createSharedFile(MemoryBufferRef MB); } // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp index 6564e7995a89..805e51dab507 100644 --- a/contrib/llvm/tools/lld/ELF/InputSection.cpp +++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp @@ -13,103 +13,187 @@ #include "Error.h" #include "InputFiles.h" #include "LinkerScript.h" +#include "Memory.h" #include "OutputSections.h" +#include "Relocations.h" +#include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" - #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" +#include <mutex> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -template <class ELFT> bool elf::isDiscarded(InputSectionBase<ELFT> *S) { - return !S || S == &InputSection<ELFT>::Discarded || !S->Live || - Script<ELFT>::X->isDiscarded(S); +// Returns a string to construct an error message. +template <class ELFT> +std::string elf::toString(const InputSectionBase<ELFT> *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); +} + +template <class ELFT> +static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File, + const typename ELFT::Shdr *Hdr) { + if (!File || Hdr->sh_type == SHT_NOBITS) + return makeArrayRef<uint8_t>(nullptr, Hdr->sh_size); + return check(File->getObj().getSectionContents(Hdr)); } template <class ELFT> InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, - const Elf_Shdr *Header, + uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, + uint32_t Info, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, Kind SectionKind) - : Header(Header), File(File), SectionKind(SectionKind), Repl(this), - Compressed(Header->sh_flags & SHF_COMPRESSED) { - // The garbage collector sets sections' Live bits. - // If GC is disabled, all sections are considered live by default. - Live = !Config->GcSections; + : InputSectionData(SectionKind, Name, Data, + !Config->GcSections || !(Flags & SHF_ALLOC)), + File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), + Info(Info), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; // The ELF spec states that a value of 0 means the section has // no alignment constraits. - Alignment = std::max<uintX_t>(Header->sh_addralign, 1); + uint64_t V = std::max<uint64_t>(Addralign, 1); + if (!isPowerOf2_64(V)) + fatal(toString(File) + ": section sh_addralign is not a power of 2"); + + // We reject object files having insanely large alignments even though + // they are allowed by the spec. I think 4GB is a reasonable limitation. + // We might want to relax this in the future. + if (V > UINT32_MAX) + fatal(toString(File) + ": section sh_addralign is too large"); + Alignment = V; + + // If it is not a mergeable section, overwrite the flag so that the flag + // is consistent with the class. This inconsistency could occur when + // string merging is disabled using -O0 flag. + if (!Config->Relocatable && !isa<MergeInputSection<ELFT>>(this)) + this->Flags &= ~(SHF_MERGE | SHF_STRINGS); +} + +template <class ELFT> +InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, + const Elf_Shdr *Hdr, StringRef Name, + Kind SectionKind) + : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, Hdr->sh_type, + Hdr->sh_entsize, Hdr->sh_link, Hdr->sh_info, + Hdr->sh_addralign, getSectionContents(File, Hdr), Name, + SectionKind) { + this->Offset = Hdr->sh_offset; } template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const { + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) + return S->getSize(); + if (auto *D = dyn_cast<InputSection<ELFT>>(this)) if (D->getThunksSize() > 0) return D->getThunkOff() + D->getThunksSize(); - return Header->sh_size; -} -template <class ELFT> StringRef InputSectionBase<ELFT>::getSectionName() const { - return check(File->getObj().getSectionName(this->Header)); + return Data.size(); } -template <class ELFT> -ArrayRef<uint8_t> InputSectionBase<ELFT>::getSectionData() const { - if (Compressed) - return ArrayRef<uint8_t>((const uint8_t *)Uncompressed.data(), - Uncompressed.size()); - return check(this->File->getObj().getSectionContents(this->Header)); +// Returns a string for an error message. +template <class SectionT> static std::string getName(SectionT *Sec) { + return (Sec->getFile()->getName() + ":(" + Sec->Name + ")").str(); } template <class ELFT> typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const { - switch (SectionKind) { + switch (kind()) { case Regular: return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; + case Synthetic: + // For synthetic sections we treat offset -1 as the end of the section. + // The same approach is used for synthetic symbols (DefinedSynthetic). + return cast<InputSection<ELFT>>(this)->OutSecOff + + (Offset == uintX_t(-1) ? getSize() : Offset); case EHFrame: - return cast<EhInputSection<ELFT>>(this)->getOffset(Offset); + // The file crtbeginT.o has relocations pointing to the start of an empty + // .eh_frame that is known to be the first in the link. It does that to + // identify the start of the output .eh_frame. + return Offset; case Merge: return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); - case MipsReginfo: - case MipsOptions: - // MIPS .reginfo and .MIPS.options sections are consumed by the linker, - // and the linker produces a single output section. It is possible that - // input files contain section symbol points to the corresponding input - // section. Redirect it to the produced output section. - if (Offset != 0) - fatal("Unsupported reference to the middle of '" + getSectionName() + - "' section"); - return this->OutSec->getVA(); } llvm_unreachable("invalid section kind"); } -template <class ELFT> void InputSectionBase<ELFT>::uncompress() { - if (!zlib::isAvailable()) - fatal("build lld with zlib to enable compressed sections support"); +template <class ELFT> bool InputSectionBase<ELFT>::isCompressed() const { + return (Flags & SHF_COMPRESSED) || Name.startswith(".zdebug"); +} - // A compressed section consists of a header of Elf_Chdr type - // followed by compressed data. - ArrayRef<uint8_t> Data = - check(this->File->getObj().getSectionContents(this->Header)); +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) { + // Compressed section with Elf_Chdr is the ELF standard. if (Data.size() < sizeof(Elf_Chdr)) - fatal("corrupt compressed section"); - + fatal(toString(this) + ": corrupted compressed section"); auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data()); - Data = Data.slice(sizeof(Elf_Chdr)); - if (Hdr->ch_type != ELFCOMPRESS_ZLIB) - fatal("unsupported compression type"); + fatal(toString(this) + ": unsupported compression type"); + return {Data.slice(sizeof(*Hdr)), Hdr->ch_size}; +} - StringRef Buf((const char *)Data.data(), Data.size()); - if (zlib::uncompress(Buf, Uncompressed, Hdr->ch_size) != zlib::StatusOK) - fatal("error uncompressing section"); +// Returns compressed data and its size when uncompressed. +template <class ELFT> +std::pair<ArrayRef<uint8_t>, uint64_t> +InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) { + // Compressed sections without Elf_Chdr header contain this header + // instead. This is a GNU extension. + struct ZlibHeader { + char Magic[4]; // Should be "ZLIB" + char Size[8]; // Uncompressed size in big-endian + }; + + if (Data.size() < sizeof(ZlibHeader)) + fatal(toString(this) + ": corrupted compressed section"); + auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data()); + if (memcmp(Hdr->Magic, "ZLIB", 4)) + fatal(toString(this) + ": broken ZLIB-compressed section"); + return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)}; +} + +// Uncompress section contents. Note that this function is called +// from parallel_for_each, so it must be thread-safe. +template <class ELFT> void InputSectionBase<ELFT>::uncompress() { + if (!zlib::isAvailable()) + fatal(toString(this) + + ": build lld with zlib to enable compressed sections support"); + + // This section is compressed. Here we decompress it. Ideally, all + // compressed sections have SHF_COMPRESSED bit and their contents + // start with headers of Elf_Chdr type. However, sections whose + // names start with ".zdebug_" don't have the bit and contains a raw + // ZLIB-compressed data (which is a bad thing because section names + // shouldn't be significant in ELF.) We need to be able to read both. + ArrayRef<uint8_t> Buf; // Compressed data + size_t Size; // Uncompressed size + if (Flags & SHF_COMPRESSED) + std::tie(Buf, Size) = getElfCompressedData(Data); + else + std::tie(Buf, Size) = getRawCompressedData(Data); + + // Uncompress Buf. + char *OutputBuf; + { + static std::mutex Mu; + std::lock_guard<std::mutex> Lock(Mu); + OutputBuf = BAlloc.Allocate<char>(Size); + } + if (zlib::uncompress(toStringRef(Buf), OutputBuf, Size) != zlib::StatusOK) + fatal(toString(this) + ": error while uncompressing section"); + Data = ArrayRef<uint8_t>((uint8_t *)OutputBuf, Size); } template <class ELFT> @@ -119,29 +203,71 @@ InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const { } template <class ELFT> +InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const { + if ((Flags & SHF_LINK_ORDER) && Link != 0) + return getFile()->getSections()[Link]; + return nullptr; +} + +// Returns a source location string. Used to construct an error message. +template <class ELFT> +std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { + // First check if we can get desired values from debugging information. + std::string LineInfo = File->getLineInfo(this, Offset); + if (!LineInfo.empty()) + return LineInfo; + + // File->SourceFile contains STT_FILE symbol that contains a + // source file name. If it's missing, we use an object file name. + std::string SrcFile = File->SourceFile; + if (SrcFile.empty()) + SrcFile = toString(File); + + // Find a function symbol that encloses a given location. + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B)) + if (D->Section == this && D->Type == STT_FUNC) + if (D->Value <= Offset && Offset < D->Value + D->Size) + return SrcFile + ":(function " + toString(*D) + ")"; + + // If there's no symbol, print out the offset in the section. + return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); +} + +template <class ELFT> +InputSection<ELFT>::InputSection() : InputSectionBase<ELFT>() {} + +template <class ELFT> +InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type, + uintX_t Addralign, ArrayRef<uint8_t> Data, + StringRef Name, Kind K) + : InputSectionBase<ELFT>(nullptr, Flags, Type, + /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign, + Data, Name, K) {} + +template <class ELFT> InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : InputSectionBase<ELFT>(F, Header, Base::Regular) {} + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {} template <class ELFT> -bool InputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == Base::Regular; +bool InputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == Base::Regular || S->kind() == Base::Synthetic; } template <class ELFT> InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() { - assert(this->Header->sh_type == SHT_RELA || this->Header->sh_type == SHT_REL); + assert(this->Type == SHT_RELA || this->Type == SHT_REL); ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections(); - return Sections[this->Header->sh_info]; + return Sections[this->Info]; } -template <class ELFT> -void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { +template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { Thunks.push_back(T); } template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const { - return this->Header->sh_size; + return this->Data.size(); } template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const { @@ -163,35 +289,62 @@ void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { uint32_t Type = Rel.getType(Config->Mips64EL); SymbolBody &Body = this->File->getRelocTargetSym(Rel); - RelTy *P = reinterpret_cast<RelTy *>(Buf); + Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf); Buf += sizeof(RelTy); + if (Config->Rela) + P->r_addend = getAddend<ELFT>(Rel); P->r_offset = RelocatedSection->getOffset(Rel.r_offset); P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL); } } -// Page(Expr) is the page address of the expression Expr, defined -// as (Expr & ~0xFFF). (This applies even if the machine page size -// supported by the platform has a different value.) -static uint64_t getAArch64Page(uint64_t Expr) { - return Expr & (~static_cast<uint64_t>(0xFFF)); +static uint32_t getARMUndefinedRelativeWeakVA(uint32_t Type, uint32_t A, + uint32_t P) { + switch (Type) { + case R_ARM_THM_JUMP11: + return P + 2; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_PREL31: + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + return P + 4; + case R_ARM_THM_CALL: + // We don't want an interworking BLX to ARM + return P + 5; + default: + return A; + } } -template <class ELFT> -static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, - typename ELFT::uint P, - const SymbolBody &Body, RelExpr Expr) { - typedef typename ELFT::uint uintX_t; +static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, + uint64_t P) { + switch (Type) { + case R_AARCH64_CALL26: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_TSTBR14: + return P + 4; + default: + return A; + } +} +template <class ELFT> +static typename ELFT::uint +getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, + const SymbolBody &Body, RelExpr Expr) { switch (Expr) { case R_HINT: + case R_TLSDESC_CALL: llvm_unreachable("cannot relocate hint relocs"); case R_TLSLD: - return Out<ELFT>::Got->getTlsIndexOff() + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); case R_TLSLD_PC: - return Out<ELFT>::Got->getTlsIndexVA() + A - P; + return In<ELFT>::Got->getTlsIndexVA() + A - P; case R_THUNK_ABS: return Body.getThunkVA<ELFT>() + A; case R_THUNK_PC: @@ -200,14 +353,14 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_PPC_TOC: return getPPC64TocBase() + A; case R_TLSGD: - return Out<ELFT>::Got->getGlobalDynOffset(Body) + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return In<ELFT>::Got->getGlobalDynOffset(Body) + A - + In<ELFT>::Got->getSize(); case R_TLSGD_PC: - return Out<ELFT>::Got->getGlobalDynAddr(Body) + A - P; + return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; case R_TLSDESC: - return Out<ELFT>::Got->getGlobalDynAddr(Body) + A; + return In<ELFT>::Got->getGlobalDynAddr(Body) + A; case R_TLSDESC_PAGE: - return getAArch64Page(Out<ELFT>::Got->getGlobalDynAddr(Body) + A) - + return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - getAArch64Page(P); case R_PLT: return Body.getPltVA<ELFT>() + A; @@ -217,11 +370,13 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_SIZE: return Body.getSize<ELFT>() + A; case R_GOTREL: - return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA(); + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA(); + case R_GOTREL_FROM_END: + return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA() - + In<ELFT>::Got->getSize(); case R_RELAX_TLS_GD_TO_IE_END: case R_GOT_FROM_END: - return Body.getGotOffset<ELFT>() + A - - Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t); + return Body.getGotOffset<ELFT>() + A - In<ELFT>::Got->getSize(); case R_RELAX_TLS_GD_TO_IE_ABS: case R_GOT: return Body.getGotVA<ELFT>() + A; @@ -232,11 +387,21 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, case R_GOT_PC: return Body.getGotVA<ELFT>() + A - P; case R_GOTONLY_PC: - return Out<ELFT>::Got->getVA() + A - P; + return In<ELFT>::Got->getVA() + A - P; + case R_GOTONLY_PC_FROM_END: + return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize(); case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_IE_TO_LE: case R_RELAX_TLS_GD_TO_LE: case R_TLS: + // A weak undefined TLS symbol resolves to the base of the TLS + // block, i.e. gets a value of zero. If we pass --gc-sections to + // lld and .tbss is not referenced, it gets reclaimed and we don't + // create a TLS program header. Therefore, we resolve this + // statically to zero. + if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && + Body.symbol()->isWeak()) + return 0; if (Target->TcbSize) return Body.getVA<ELFT>(A) + alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align); @@ -253,18 +418,26 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, // If relocation against MIPS local symbol requires GOT entry, this entry // should be initialized by 'page address'. This address is high 16-bits // of sum the symbol's value and the addend. - return Out<ELFT>::Got->getMipsLocalPageOffset(Body.getVA<ELFT>(A)); + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getPageEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); case R_MIPS_GOT_OFF: + case R_MIPS_GOT_OFF32: // In case of MIPS if a GOT relocation has non-zero addend this addend // should be applied to the GOT entry content not to the GOT entry offset. // That is why we use separate expression type. - return Out<ELFT>::Got->getMipsGotOffset(Body, A); + return In<ELFT>::MipsGot->getVA() + + In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) - + In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOTREL: + return Body.getVA<ELFT>(A) - In<ELFT>::MipsGot->getGp(); case R_MIPS_TLSGD: - return Out<ELFT>::Got->getGlobalDynOffset(Body) + - Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset; + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getGlobalDynOffset(Body) - + In<ELFT>::MipsGot->getGp(); case R_MIPS_TLSLD: - return Out<ELFT>::Got->getTlsIndexOff() + - Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset; + return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + + In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp(); case R_PPC_OPD: { uint64_t SymVA = Body.getVA<ELFT>(A); // If we have an undefined weak symbol, we might get here with a symbol @@ -275,8 +448,8 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, if (Out<ELF64BE>::Opd) { // If this is a local call, and we currently have the address of a // function-descriptor, get the underlying code address instead. - uint64_t OpdStart = Out<ELF64BE>::Opd->getVA(); - uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize(); + uint64_t OpdStart = Out<ELF64BE>::Opd->Addr; + uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->Size; bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; if (InOpd) SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]); @@ -284,10 +457,20 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, return SymVA - P; } case R_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { + // On ARM and AArch64 a branch to an undefined weak resolves to the + // next instruction, otherwise the place. + if (Config->EMachine == EM_ARM) + return getARMUndefinedRelativeWeakVA(Type, A, P); + if (Config->EMachine == EM_AARCH64) + return getAArch64UndefinedRelativeWeakVA(Type, A, P); + } case R_RELAX_GOT_PC: return Body.getVA<ELFT>(A) - P; case R_PLT_PAGE_PC: case R_PAGE_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return getAArch64Page(A); return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P); } llvm_unreachable("Invalid expression"); @@ -303,7 +486,6 @@ static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A, template <class ELFT> template <class RelTy> void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { - const unsigned Bits = sizeof(uintX_t) * 8; for (const RelTy &Rel : Rels) { uint32_t Type = Rel.getType(Config->Mips64EL); uintX_t Offset = this->getOffset(Rel.r_offset); @@ -314,13 +496,15 @@ void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { SymbolBody &Sym = this->File->getRelocTargetSym(Rel); if (Target->getRelExpr(Type, Sym) != R_ABS) { - error(this->getSectionName() + " has non-ABS reloc"); + error(this->getLocation(Offset) + ": has non-ABS reloc"); return; } - uintX_t AddrLoc = this->OutSec->getVA() + Offset; - uint64_t SymVA = - SignExtend64<Bits>(getSymVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); + uintX_t AddrLoc = this->OutSec->Addr + Offset; + uint64_t SymVA = 0; + if (!Sym.isTls() || Out<ELFT>::TlsPhdr) + SymVA = SignExtend64<sizeof(uintX_t) * 8>( + getRelocTargetVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); Target->relocateOne(BufLoc, Type, SymVA); } } @@ -331,78 +515,80 @@ void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { // vector only for SHF_ALLOC'ed sections. For other sections, // we handle relocations directly here. auto *IS = dyn_cast<InputSection<ELFT>>(this); - if (IS && !(IS->Header->sh_flags & SHF_ALLOC)) { - for (const Elf_Shdr *RelSec : IS->RelocSections) { - if (RelSec->sh_type == SHT_RELA) - IS->relocateNonAlloc(Buf, IS->File->getObj().relas(RelSec)); - else - IS->relocateNonAlloc(Buf, IS->File->getObj().rels(RelSec)); - } + if (IS && !(IS->Flags & SHF_ALLOC)) { + if (IS->AreRelocsRela) + IS->relocateNonAlloc(Buf, IS->relas()); + else + IS->relocateNonAlloc(Buf, IS->rels()); return; } const unsigned Bits = sizeof(uintX_t) * 8; - for (const Relocation<ELFT> &Rel : Relocations) { - uintX_t Offset = Rel.InputSec->getOffset(Rel.Offset); + for (const Relocation &Rel : Relocations) { + uintX_t Offset = getOffset(Rel.Offset); uint8_t *BufLoc = Buf + Offset; uint32_t Type = Rel.Type; uintX_t A = Rel.Addend; - uintX_t AddrLoc = OutSec->getVA() + Offset; + uintX_t AddrLoc = OutSec->Addr + Offset; RelExpr Expr = Rel.Expr; - uint64_t SymVA = - SignExtend64<Bits>(getSymVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); + uint64_t TargetVA = SignExtend64<Bits>( + getRelocTargetVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, SymVA); + Target->relaxGot(BufLoc, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: - Target->relaxTlsIeToLe(BufLoc, Type, SymVA); + Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_LD_TO_LE: - Target->relaxTlsLdToLe(BufLoc, Type, SymVA); + Target->relaxTlsLdToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - Target->relaxTlsGdToLe(BufLoc, Type, SymVA); + Target->relaxTlsGdToLe(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE_END: - Target->relaxTlsGdToIe(BufLoc, Type, SymVA); + Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; case R_PPC_PLT_OPD: // Patch a nop (0x60000000) to a ld. if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000) write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1) - // fallthrough + // fallthrough default: - Target->relocateOne(BufLoc, Type, SymVA); + Target->relocateOne(BufLoc, Type, TargetVA); break; } } } template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { - if (this->Header->sh_type == SHT_NOBITS) + if (this->Type == SHT_NOBITS) + return; + + if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) { + S->writeTo(Buf + OutSecOff); return; - ELFFile<ELFT> &EObj = this->File->getObj(); + } // If -r is given, then an InputSection may be a relocation section. - if (this->Header->sh_type == SHT_RELA) { - copyRelocations(Buf + OutSecOff, EObj.relas(this->Header)); + if (this->Type == SHT_RELA) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>()); return; } - if (this->Header->sh_type == SHT_REL) { - copyRelocations(Buf + OutSecOff, EObj.rels(this->Header)); + if (this->Type == SHT_REL) { + copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>()); return; } // Copy section contents from source object file to output file. - ArrayRef<uint8_t> Data = this->getSectionData(); + ArrayRef<uint8_t> Data = this->Data; memcpy(Buf + OutSecOff, Data.data(), Data.size()); // Iterate over all relocation sections that apply to this section. @@ -431,15 +617,9 @@ void InputSection<ELFT>::replace(InputSection<ELFT> *Other) { } template <class ELFT> -SplitInputSection<ELFT>::SplitInputSection( - elf::ObjectFile<ELFT> *File, const Elf_Shdr *Header, - typename InputSectionBase<ELFT>::Kind SectionKind) - : InputSectionBase<ELFT>(File, Header, SectionKind) {} - -template <class ELFT> EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::EHFrame) { + const Elf_Shdr *Header, StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) { // Mark .eh_frame sections as live by default because there are // usually no relocations that point to .eh_frames. Otherwise, // the garbage collector would drop all .eh_frame sections. @@ -447,18 +627,54 @@ EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, } template <class ELFT> -bool EhInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::EHFrame; +bool EhInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::EHFrame; +} + +// Returns the index of the first relocation that points to a region between +// Begin and Begin+Size. +template <class IntTy, class RelTy> +static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, + unsigned &RelocI) { + // Start search from RelocI for fast access. That works because the + // relocations are sorted in .eh_frame. + for (unsigned N = Rels.size(); RelocI < N; ++RelocI) { + const RelTy &Rel = Rels[RelocI]; + if (Rel.r_offset < Begin) + continue; + + if (Rel.r_offset < Begin + Size) + return RelocI; + return -1; + } + return -1; } // .eh_frame is a sequence of CIE or FDE records. // This function splits an input section into records and returns them. +template <class ELFT> void EhInputSection<ELFT>::split() { + // Early exit if already split. + if (!this->Pieces.empty()) + return; + + if (this->NumRelocations) { + if (this->AreRelocsRela) + split(this->relas()); + else + split(this->rels()); + return; + } + split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); +} + template <class ELFT> -void EhInputSection<ELFT>::split() { - ArrayRef<uint8_t> Data = this->getSectionData(); +template <class RelTy> +void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) { + ArrayRef<uint8_t> Data = this->Data; + unsigned RelI = 0; for (size_t Off = 0, End = Data.size(); Off != End;) { - size_t Size = readEhRecordSize<ELFT>(Data.slice(Off)); - this->Pieces.emplace_back(Off, Data.slice(Off, Size)); + size_t Size = readEhRecordSize<ELFT>(this, Off); + this->Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); // The empty record is the end marker. if (Size == 4) break; @@ -466,21 +682,6 @@ void EhInputSection<ELFT>::split() { } } -template <class ELFT> -typename ELFT::uint EhInputSection<ELFT>::getOffset(uintX_t Offset) const { - // The file crtbeginT.o has relocations pointing to the start of an empty - // .eh_frame that is known to be the first in the link. It does that to - // identify the start of the output .eh_frame. Handle this special case. - if (this->getSectionHdr()->sh_size == 0) - return Offset; - const SectionPiece *Piece = this->getSectionPiece(Offset); - if (Piece->OutputOff == size_t(-1)) - return -1; // Not in the output - - uintX_t Addend = Offset - Piece->InputOff; - return Piece->OutputOff + Addend; -} - static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { // Optimize the common case. StringRef S((const char *)A.data(), A.size()); @@ -497,75 +698,96 @@ static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. -static std::vector<SectionPiece> splitStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { - std::vector<SectionPiece> V; +template <class ELFT> +void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { size_t Off = 0; + bool IsAlloc = this->Flags & SHF_ALLOC; while (!Data.empty()) { size_t End = findNull(Data, EntSize); if (End == StringRef::npos) - fatal("string is not null terminated"); + fatal(toString(this) + ": string is not null terminated"); size_t Size = End + EntSize; - V.emplace_back(Off, Data.slice(0, Size)); + Pieces.emplace_back(Off, !IsAlloc); + Hashes.push_back(hash_value(toStringRef(Data.slice(0, Size)))); Data = Data.slice(Size); Off += Size; } - return V; } // Split non-SHF_STRINGS section. Such section is a sequence of // fixed size records. -static std::vector<SectionPiece> splitNonStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { - std::vector<SectionPiece> V; +template <class ELFT> +void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { size_t Size = Data.size(); assert((Size % EntSize) == 0); - for (unsigned I = 0, N = Size; I != N; I += EntSize) - V.emplace_back(I, Data.slice(I, EntSize)); - return V; + bool IsAlloc = this->Flags & SHF_ALLOC; + for (unsigned I = 0, N = Size; I != N; I += EntSize) { + Hashes.push_back(hash_value(toStringRef(Data.slice(I, EntSize)))); + Pieces.emplace_back(I, !IsAlloc); + } } template <class ELFT> MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header) - : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {} + const Elf_Shdr *Header, + StringRef Name) + : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {} +// This function is called after we obtain a complete list of input sections +// that need to be linked. This is responsible to split section contents +// into small chunks for further processing. +// +// Note that this function is called from parallel_for_each. This must be +// thread-safe (i.e. no memory allocation from the pools). template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() { - ArrayRef<uint8_t> Data = this->getSectionData(); - uintX_t EntSize = this->Header->sh_entsize; - if (this->Header->sh_flags & SHF_STRINGS) - this->Pieces = splitStrings(Data, EntSize); + ArrayRef<uint8_t> Data = this->Data; + uintX_t EntSize = this->Entsize; + if (this->Flags & SHF_STRINGS) + splitStrings(Data, EntSize); else - this->Pieces = splitNonStrings(Data, EntSize); + splitNonStrings(Data, EntSize); - if (Config->GcSections) + if (Config->GcSections && (this->Flags & SHF_ALLOC)) for (uintX_t Off : LiveOffsets) this->getSectionPiece(Off)->Live = true; } template <class ELFT> -bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::Merge; +bool MergeInputSection<ELFT>::classof(const InputSectionData *S) { + return S->kind() == InputSectionBase<ELFT>::Merge; } // Do binary search to get a section piece at a given input offset. template <class ELFT> -SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) { - auto *This = static_cast<const SplitInputSection<ELFT> *>(this); +SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) { + auto *This = static_cast<const MergeInputSection<ELFT> *>(this); return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); } +template <class It, class T, class Compare> +static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { + size_t Size = std::distance(First, Last); + assert(Size != 0); + while (Size != 1) { + size_t H = Size / 2; + const It MI = First + H; + Size -= H; + First = Comp(Value, *MI) ? First : First + H; + } + return Comp(Value, *First) ? First : First + 1; +} + template <class ELFT> const SectionPiece * -SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { - ArrayRef<uint8_t> D = this->getSectionData(); - StringRef Data((const char *)D.data(), D.size()); - uintX_t Size = Data.size(); +MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { + uintX_t Size = this->Data.size(); if (Offset >= Size) - fatal("entry is past the end of the section"); + fatal(toString(this) + ": entry is past the end of the section"); // Find the element this offset points to. - auto I = std::upper_bound( + auto I = fastUpperBound( Pieces.begin(), Pieces.end(), Offset, [](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; }); --I; @@ -577,84 +799,31 @@ SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { // it is not just an addition to a base output offset. template <class ELFT> typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { + // Initialize OffsetMap lazily. + std::call_once(InitOffsetMap, [&] { + OffsetMap.reserve(Pieces.size()); + for (const SectionPiece &Piece : Pieces) + OffsetMap[Piece.InputOff] = Piece.OutputOff; + }); + + // Find a string starting at a given offset. auto It = OffsetMap.find(Offset); if (It != OffsetMap.end()) return It->second; + if (!this->Live) + return 0; + // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. const SectionPiece &Piece = *this->getSectionPiece(Offset); - assert(Piece.Live); + if (!Piece.Live) + return 0; + uintX_t Addend = Offset - Piece.InputOff; return Piece.OutputOff + Addend; } -// Create a map from input offsets to output offsets for all section pieces. -// It is called after finalize(). -template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() { - OffsetMap.grow(this->Pieces.size()); - for (SectionPiece &Piece : this->Pieces) { - if (!Piece.Live) - continue; - if (Piece.OutputOff == size_t(-1)) { - // Offsets of tail-merged strings are computed lazily. - auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec); - ArrayRef<uint8_t> D = Piece.data(); - StringRef S((const char *)D.data(), D.size()); - Piece.OutputOff = OutSec->getOffset(S); - } - OffsetMap[Piece.InputOff] = Piece.OutputOff; - } -} - -template <class ELFT> -MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Hdr) - : InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsReginfo) { - // Initialize this->Reginfo. - ArrayRef<uint8_t> D = this->getSectionData(); - if (D.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) { - error("invalid size of .reginfo section"); - return; - } - Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(D.data()); -} - -template <class ELFT> -bool MipsReginfoInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::MipsReginfo; -} - -template <class ELFT> -MipsOptionsInputSection<ELFT>::MipsOptionsInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Hdr) - : InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsOptions) { - // Find ODK_REGINFO option in the section's content. - ArrayRef<uint8_t> D = this->getSectionData(); - while (!D.empty()) { - if (D.size() < sizeof(Elf_Mips_Options<ELFT>)) { - error("invalid size of .MIPS.options section"); - break; - } - auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(D.data()); - if (O->kind == ODK_REGINFO) { - Reginfo = &O->getRegInfo(); - break; - } - D = D.slice(O->size); - } -} - -template <class ELFT> -bool MipsOptionsInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) { - return S->SectionKind == InputSectionBase<ELFT>::MipsOptions; -} - -template bool elf::isDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); -template bool elf::isDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); -template bool elf::isDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); -template bool elf::isDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); - template class elf::InputSectionBase<ELF32LE>; template class elf::InputSectionBase<ELF32BE>; template class elf::InputSectionBase<ELF64LE>; @@ -665,11 +834,6 @@ template class elf::InputSection<ELF32BE>; template class elf::InputSection<ELF64LE>; template class elf::InputSection<ELF64BE>; -template class elf::SplitInputSection<ELF32LE>; -template class elf::SplitInputSection<ELF32BE>; -template class elf::SplitInputSection<ELF64LE>; -template class elf::SplitInputSection<ELF64BE>; - template class elf::EhInputSection<ELF32LE>; template class elf::EhInputSection<ELF32BE>; template class elf::EhInputSection<ELF64LE>; @@ -680,12 +844,7 @@ template class elf::MergeInputSection<ELF32BE>; template class elf::MergeInputSection<ELF64LE>; template class elf::MergeInputSection<ELF64BE>; -template class elf::MipsReginfoInputSection<ELF32LE>; -template class elf::MipsReginfoInputSection<ELF32BE>; -template class elf::MipsReginfoInputSection<ELF64LE>; -template class elf::MipsReginfoInputSection<ELF64BE>; - -template class elf::MipsOptionsInputSection<ELF32LE>; -template class elf::MipsOptionsInputSection<ELF32BE>; -template class elf::MipsOptionsInputSection<ELF64LE>; -template class elf::MipsOptionsInputSection<ELF64BE>; +template std::string elf::toString(const InputSectionBase<ELF32LE> *); +template std::string elf::toString(const InputSectionBase<ELF32BE> *); +template std::string elf::toString(const InputSectionBase<ELF64LE> *); +template std::string elf::toString(const InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/InputSection.h b/contrib/llvm/tools/lld/ELF/InputSection.h index 61a89c540c5d..adbc1e1e3829 100644 --- a/contrib/llvm/tools/lld/ELF/InputSection.h +++ b/contrib/llvm/tools/lld/ELF/InputSection.h @@ -14,25 +14,64 @@ #include "Relocations.h" #include "Thunks.h" #include "lld/Core/LLVM.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Object/ELF.h" +#include <mutex> namespace lld { namespace elf { -template <class ELFT> bool isDiscarded(InputSectionBase<ELFT> *S); - +class DefinedCommon; class SymbolBody; +struct SectionPiece; -template <class ELFT> class ICF; template <class ELFT> class DefinedRegular; template <class ELFT> class ObjectFile; template <class ELFT> class OutputSection; -template <class ELFT> class OutputSectionBase; +class OutputSectionBase; + +// We need non-template input section class to store symbol layout +// in linker script parser structures, where we do not have ELFT +// template parameter. For each scripted output section symbol we +// store pointer to preceding InputSectionData object or nullptr, +// if symbol should be placed at the very beginning of the output +// section +class InputSectionData { +public: + enum Kind { Regular, EHFrame, Merge, Synthetic, }; + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + InputSectionData(Kind SectionKind, StringRef Name, ArrayRef<uint8_t> Data, + bool Live) + : SectionKind(SectionKind), Live(Live), Assigned(false), Name(Name), + Data(Data) {} + +private: + unsigned SectionKind : 3; + +public: + Kind kind() const { return (Kind)SectionKind; } + + unsigned Live : 1; // for garbage collection + unsigned Assigned : 1; // for linker script + uint32_t Alignment; + StringRef Name; + ArrayRef<uint8_t> Data; + + template <typename T> llvm::ArrayRef<T> getDataAs() const { + size_t S = Data.size(); + assert(S % sizeof(T) == 0); + return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + } + + std::vector<Relocation> Relocations; +}; // This corresponds to a section of an input file. -template <class ELFT> class InputSectionBase { +template <class ELFT> class InputSectionBase : public InputSectionData { protected: typedef typename ELFT::Chdr Elf_Chdr; typedef typename ELFT::Rel Elf_Rel; @@ -40,27 +79,46 @@ protected: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::uint uintX_t; - const Elf_Shdr *Header; // The file this section is from. ObjectFile<ELFT> *File; - // If a section is compressed, this vector has uncompressed section data. - SmallVector<char, 0> Uncompressed; - public: - enum Kind { Regular, EHFrame, Merge, MipsReginfo, MipsOptions }; - Kind SectionKind; - - InputSectionBase() : Repl(this) {} + // These corresponds to the fields in Elf_Shdr. + uintX_t Flags; + uintX_t Offset = 0; + uintX_t Entsize; + uint32_t Type; + uint32_t Link; + uint32_t Info; + + InputSectionBase() + : InputSectionData(Regular, "", ArrayRef<uint8_t>(), false), Repl(this) { + NumRelocations = 0; + AreRelocsRela = false; + } InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + StringRef Name, Kind SectionKind); + InputSectionBase(ObjectFile<ELFT> *File, uintX_t Flags, uint32_t Type, + uintX_t Entsize, uint32_t Link, uint32_t Info, + uintX_t Addralign, ArrayRef<uint8_t> Data, StringRef Name, Kind SectionKind); - OutputSectionBase<ELFT> *OutSec = nullptr; - uint32_t Alignment; - - // Used for garbage collection. - bool Live; + OutputSectionBase *OutSec = nullptr; + + // Relocations that refer to this section. + const Elf_Rel *FirstRelocation = nullptr; + unsigned NumRelocations : 31; + unsigned AreRelocsRela : 1; + ArrayRef<Elf_Rel> rels() const { + assert(!AreRelocsRela); + return llvm::makeArrayRef(FirstRelocation, NumRelocations); + } + ArrayRef<Elf_Rela> relas() const { + assert(AreRelocsRela); + return llvm::makeArrayRef(static_cast<const Elf_Rela *>(FirstRelocation), + NumRelocations); + } // This pointer points to the "real" instance of this instance. // Usually Repl == this. However, if ICF merges two sections, @@ -72,140 +130,163 @@ public: // Returns the size of this section (even if this is a common or BSS.) size_t getSize() const; - static InputSectionBase<ELFT> Discarded; - - StringRef getSectionName() const; - const Elf_Shdr *getSectionHdr() const { return Header; } ObjectFile<ELFT> *getFile() const { return File; } + llvm::object::ELFFile<ELFT> getObj() const { return File->getObj(); } uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const; - + InputSectionBase *getLinkOrderDep() const; // Translate an offset in the input section to an offset in the output // section. uintX_t getOffset(uintX_t Offset) const; - ArrayRef<uint8_t> getSectionData() const; - + // ELF supports ZLIB-compressed section. + // Returns true if the section is compressed. + bool isCompressed() const; void uncompress(); + // Returns a source location string. Used to construct an error message. + std::string getLocation(uintX_t Offset); + void relocate(uint8_t *Buf, uint8_t *BufEnd); - std::vector<Relocation<ELFT>> Relocations; - bool Compressed; -}; +private: + std::pair<ArrayRef<uint8_t>, uint64_t> + getElfCompressedData(ArrayRef<uint8_t> Data); -template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded; + std::pair<ArrayRef<uint8_t>, uint64_t> + getRawCompressedData(ArrayRef<uint8_t> Data); +}; // SectionPiece represents a piece of splittable section contents. +// We allocate a lot of these and binary search on them. This means that they +// have to be as compact as possible, which is why we don't store the size (can +// be found by looking at the next one) and put the hash in a side table. struct SectionPiece { - SectionPiece(size_t Off, ArrayRef<uint8_t> Data) - : InputOff(Off), Data((const uint8_t *)Data.data()), Size(Data.size()), - Live(!Config->GcSections) {} - - ArrayRef<uint8_t> data() { return {Data, Size}; } - size_t size() const { return Size; } + SectionPiece(size_t Off, bool Live = false) + : InputOff(Off), OutputOff(-1), Live(Live || !Config->GcSections) {} size_t InputOff; - size_t OutputOff = -1; - -private: - // We use bitfields because SplitInputSection is accessed by - // std::upper_bound very often. - // We want to save bits to make it cache friendly. - const uint8_t *Data; - uint32_t Size : 31; - -public: - uint32_t Live : 1; -}; - -// Usually sections are copied to the output as atomic chunks of data, -// but some special types of sections are split into small pieces of data -// and each piece is copied to a different place in the output. -// This class represents such special sections. -template <class ELFT> class SplitInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::uint uintX_t; - -public: - SplitInputSection(ObjectFile<ELFT> *File, const Elf_Shdr *Header, - typename InputSectionBase<ELFT>::Kind SectionKind); - - // Splittable sections are handled as a sequence of data - // rather than a single large blob of data. - std::vector<SectionPiece> Pieces; - - // Returns the SectionPiece at a given input section offset. - SectionPiece *getSectionPiece(uintX_t Offset); - const SectionPiece *getSectionPiece(uintX_t Offset) const; + ssize_t OutputOff : 8 * sizeof(ssize_t) - 1; + size_t Live : 1; }; +static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), + "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. -template <class ELFT> class MergeInputSection : public SplitInputSection<ELFT> { +template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> { typedef typename ELFT::uint uintX_t; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; public: - MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); - static bool classof(const InputSectionBase<ELFT> *S); + MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, + StringRef Name); + static bool classof(const InputSectionData *S); void splitIntoPieces(); // Mark the piece at a given offset live. Used by GC. - void markLiveAt(uintX_t Offset) { LiveOffsets.insert(Offset); } + void markLiveAt(uintX_t Offset) { + assert(this->Flags & llvm::ELF::SHF_ALLOC); + LiveOffsets.insert(Offset); + } // Translate an offset in the input section to an offset // in the output section. uintX_t getOffset(uintX_t Offset) const; - void finalizePieces(); + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<SectionPiece> Pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t I) const { + size_t Begin = Pieces[I].InputOff; + size_t End; + if (Pieces.size() - 1 == I) + End = this->Data.size(); + else + End = Pieces[I + 1].InputOff; + + StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin}; + return {S, Hashes[I]}; + } + + // Returns the SectionPiece at a given input section offset. + SectionPiece *getSectionPiece(uintX_t Offset); + const SectionPiece *getSectionPiece(uintX_t Offset) const; private: - llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + void splitStrings(ArrayRef<uint8_t> A, size_t Size); + void splitNonStrings(ArrayRef<uint8_t> A, size_t Size); + + std::vector<uint32_t> Hashes; + + mutable llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + mutable std::once_flag InitOffsetMap; + llvm::DenseSet<uintX_t> LiveOffsets; }; +struct EhSectionPiece : public SectionPiece { + EhSectionPiece(size_t Off, InputSectionData *ID, uint32_t Size, + unsigned FirstRelocation) + : SectionPiece(Off, false), ID(ID), Size(Size), + FirstRelocation(FirstRelocation) {} + InputSectionData *ID; + uint32_t Size; + uint32_t size() const { return Size; } + + ArrayRef<uint8_t> data() { return {ID->Data.data() + this->InputOff, Size}; } + unsigned FirstRelocation; +}; + // This corresponds to a .eh_frame section of an input file. -template <class ELFT> class EhInputSection : public SplitInputSection<ELFT> { +template <class ELFT> class EhInputSection : public InputSectionBase<ELFT> { public: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::uint uintX_t; - EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); - static bool classof(const InputSectionBase<ELFT> *S); + EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + static bool classof(const InputSectionData *S); void split(); + template <class RelTy> void split(ArrayRef<RelTy> Rels); - // Translate an offset in the input section to an offset in the output - // section. - uintX_t getOffset(uintX_t Offset) const; - - // Relocation section that refer to this one. - const Elf_Shdr *RelocSection = nullptr; + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector<EhSectionPiece> Pieces; }; // This corresponds to a non SHF_MERGE section of an input file. template <class ELFT> class InputSection : public InputSectionBase<ELFT> { - friend ICF<ELFT>; typedef InputSectionBase<ELFT> Base; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Rel Elf_Rel; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::uint uintX_t; + typedef InputSectionData::Kind Kind; public: - InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header); + InputSection(); + InputSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + ArrayRef<uint8_t> Data, StringRef Name, + Kind K = InputSectionData::Regular); + InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); + + static InputSection<ELFT> Discarded; // Write this section to a mmap'ed file, assuming Buf is pointing to // beginning of the output section. void writeTo(uint8_t *Buf); - // Relocation sections that refer to this one. - llvm::TinyPtrVector<const Elf_Shdr *> RelocSections; - // The offset from beginning of the output sections this section was assigned // to. The writer sets a value. uint64_t OutSecOff = 0; - static bool classof(const InputSectionBase<ELFT> *S); + // InputSection that is dependent on us (reverse dependency for GC) + InputSectionBase<ELFT> *DependentSection = nullptr; + + static bool classof(const InputSectionData *S); InputSectionBase<ELFT> *getRelocatedSection(); @@ -223,46 +304,22 @@ public: template <class RelTy> void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); -private: - template <class RelTy> - void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); + // Used by ICF. + uint32_t Class[2] = {0, 0}; // Called by ICF to merge two input sections. void replace(InputSection<ELFT> *Other); - // Used by ICF. - uint64_t GroupId = 0; +private: + template <class RelTy> + void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks; }; -// MIPS .reginfo section provides information on the registers used by the code -// in the object file. Linker should collect this information and write a single -// .reginfo section in the output file. The output section contains a union of -// used registers masks taken from input .reginfo sections and final value -// of the `_gp` symbol. For details: Chapter 4 / "Register Information" at -// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -template <class ELFT> -class MipsReginfoInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - -public: - MipsReginfoInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr); - static bool classof(const InputSectionBase<ELFT> *S); - - const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr; -}; +template <class ELFT> InputSection<ELFT> InputSection<ELFT>::Discarded; -template <class ELFT> -class MipsOptionsInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::Shdr Elf_Shdr; - -public: - MipsOptionsInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr); - static bool classof(const InputSectionBase<ELFT> *S); - - const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr; -}; +template <class ELFT> std::string toString(const InputSectionBase<ELFT> *); } // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp index 0e8006a3b32a..a3d6a141a202 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.cpp +++ b/contrib/llvm/tools/lld/ELF/LTO.cpp @@ -9,31 +9,30 @@ #include "LTO.h" #include "Config.h" -#include "Driver.h" #include "Error.h" #include "InputFiles.h" #include "Symbols.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/LoopPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/CommandFlags.h" -#include "llvm/CodeGen/ParallelCG.h" -#include "llvm/IR/AutoUpgrade.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/Verifier.h" -#include "llvm/LTO/legacy/UpdateCompilerUsed.h" -#include "llvm/Linker/IRMover.h" -#include "llvm/Passes/PassBuilder.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Config.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <system_error> +#include <vector> using namespace llvm; using namespace llvm::object; @@ -51,275 +50,110 @@ static void saveBuffer(StringRef Buffer, const Twine &Path) { OS << Buffer; } -// This is for use when debugging LTO. -static void saveBCFile(Module &M, const Twine &Path) { - std::error_code EC; - raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); - if (EC) - error(EC, "cannot create " + Path); - WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true); +static void diagnosticHandler(const DiagnosticInfo &DI) { + SmallString<128> ErrStorage; + raw_svector_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + warn(ErrStorage); } -static void runNewCustomLtoPasses(Module &M, TargetMachine &TM) { - PassBuilder PB(&TM); - - AAManager AA; - - // Parse a custom AA pipeline if asked to. - if (!PB.parseAAPipeline(AA, Config->LtoAAPipeline)) { - error("Unable to parse AA pipeline description: " + Config->LtoAAPipeline); - return; - } - - LoopAnalysisManager LAM; - FunctionAnalysisManager FAM; - CGSCCAnalysisManager CGAM; - ModuleAnalysisManager MAM; - - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return std::move(AA); }); - - // Register all the basic analyses with the managers. - PB.registerModuleAnalyses(MAM); - PB.registerCGSCCAnalyses(CGAM); - PB.registerFunctionAnalyses(FAM); - PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); - - ModulePassManager MPM; - if (!Config->DisableVerify) - MPM.addPass(VerifierPass()); +static void checkError(Error E) { + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) -> Error { + error(EIB.message()); + return Error::success(); + }); +} - // Now, add all the passes we've been requested to. - if (!PB.parsePassPipeline(MPM, Config->LtoNewPmPasses)) { - error("unable to parse pass pipeline description: " + - Config->LtoNewPmPasses); - return; - } +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config Conf; - if (!Config->DisableVerify) - MPM.addPass(VerifierPass()); - MPM.run(M, MAM); -} + // LLD supports the new relocations. + Conf.Options = InitTargetOptionsFromCodeGenFlags(); + Conf.Options.RelaxELFRelocations = true; -static void runOldLtoPasses(Module &M, TargetMachine &TM) { - // Note that the gold plugin has a similar piece of code, so - // it is probably better to move this code to a common place. - legacy::PassManager LtoPasses; - LtoPasses.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); - PassManagerBuilder PMB; - PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM.getTargetTriple())); - PMB.Inliner = createFunctionInliningPass(); - PMB.VerifyInput = PMB.VerifyOutput = !Config->DisableVerify; - PMB.LoopVectorize = true; - PMB.SLPVectorize = true; - PMB.OptLevel = Config->LtoO; - PMB.populateLTOPassManager(LtoPasses); - LtoPasses.run(M); -} + Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + Conf.DisableVerify = Config->DisableVerify; + Conf.DiagHandler = diagnosticHandler; + Conf.OptLevel = Config->LTOO; -static void runLTOPasses(Module &M, TargetMachine &TM) { - if (!Config->LtoNewPmPasses.empty()) { - // The user explicitly asked for a set of passes to be run. - // This needs the new PM to work as there's no clean way to - // pass a set of passes to run in the legacy PM. - runNewCustomLtoPasses(M, TM); - if (HasError) - return; - } else { - // Run the 'default' set of LTO passes. This code still uses - // the legacy PM as the new one is not the default. - runOldLtoPasses(M, TM); - } + // Set up a custom pipeline if we've been asked to. + Conf.OptPipeline = Config->LTONewPmPasses; + Conf.AAPipeline = Config->LTOAAPipeline; if (Config->SaveTemps) - saveBCFile(M, Config->OutputFile + ".lto.opt.bc"); + checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", + /*UseInputModulePath*/ true)); + + lto::ThinBackend Backend; + if (Config->ThinLTOJobs != -1u) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(Conf), Backend, + Config->LTOPartitions); } -static bool shouldInternalize(const SmallPtrSet<GlobalValue *, 8> &Used, - Symbol *S, GlobalValue *GV) { - if (S->IsUsedInRegularObj || Used.count(GV)) - return false; - return !S->includeInDynsym(); -} +BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} -BitcodeCompiler::BitcodeCompiler() - : Combined(new Module("ld-temp.o", Driver->Context)) {} +BitcodeCompiler::~BitcodeCompiler() = default; static void undefine(Symbol *S) { - replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type, - nullptr); -} - -static void handleUndefinedAsmRefs(const BasicSymbolRef &Sym, GlobalValue *GV, - StringSet<> &AsmUndefinedRefs) { - // GV associated => not an assembly symbol, bail out. - if (GV) - return; - - // This is an undefined reference to a symbol in asm. We put that in - // compiler.used, so that we can preserve it from being dropped from - // the output, without necessarily preventing its internalization. - SmallString<64> Name; - raw_svector_ostream OS(Name); - Sym.printName(OS); - AsmUndefinedRefs.insert(Name.str()); + replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); } void BitcodeCompiler::add(BitcodeFile &F) { - std::unique_ptr<IRObjectFile> Obj = std::move(F.Obj); - std::vector<GlobalValue *> Keep; - unsigned BodyIndex = 0; - ArrayRef<Symbol *> Syms = F.getSymbols(); - - Module &M = Obj->getModule(); - if (M.getDataLayoutStr().empty()) - fatal("invalid bitcode file: " + F.getName() + " has no datalayout"); - - // Discard non-compatible debug infos if necessary. - M.materializeMetadata(); - UpgradeDebugInfo(M); - - // If a symbol appears in @llvm.used, the linker is required - // to treat the symbol as there is a reference to the symbol - // that it cannot see. Therefore, we can't internalize. - SmallPtrSet<GlobalValue *, 8> Used; - collectUsedGlobalVariables(M, Used, /* CompilerUsed */ false); - - for (const BasicSymbolRef &Sym : Obj->symbols()) { - uint32_t Flags = Sym.getFlags(); - GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl()); - if (GV && GV->hasAppendingLinkage()) - Keep.push_back(GV); - if (BitcodeFile::shouldSkip(Flags)) - continue; - Symbol *S = Syms[BodyIndex++]; - if (Flags & BasicSymbolRef::SF_Undefined) { - handleUndefinedAsmRefs(Sym, GV, AsmUndefinedRefs); - continue; - } - auto *B = dyn_cast<DefinedBitcode>(S->body()); - if (!B || B->file() != &F) - continue; - - // We collect the set of symbols we want to internalize here - // and change the linkage after the IRMover executed, i.e. after - // we imported the symbols and satisfied undefined references - // to it. We can't just change linkage here because otherwise - // the IRMover will just rename the symbol. - if (GV && shouldInternalize(Used, S, GV)) - InternalizedSyms.insert(GV->getName()); - - // At this point we know that either the combined LTO object will provide a - // definition of a symbol, or we will internalize it. In either case, we - // need to undefine the symbol. In the former case, the real definition - // needs to be able to replace the original definition without conflicting. - // In the latter case, we need to allow the combined LTO object to provide a - // definition with the same name, for example when doing parallel codegen. - undefine(S); - - if (!GV) - // Module asm symbol. - continue; - - switch (GV->getLinkage()) { - default: - break; - case GlobalValue::LinkOnceAnyLinkage: - GV->setLinkage(GlobalValue::WeakAnyLinkage); - break; - case GlobalValue::LinkOnceODRLinkage: - GV->setLinkage(GlobalValue::WeakODRLinkage); - break; - } - - Keep.push_back(GV); - } - - IRMover Mover(*Combined); - if (Error E = Mover.move(Obj->takeModule(), Keep, - [](GlobalValue &, IRMover::ValueAdder) {})) { - handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) { - fatal("failed to link module " + F.getName() + ": " + EIB.message()); - }); + lto::InputFile &Obj = *F.Obj; + unsigned SymNum = 0; + std::vector<Symbol *> Syms = F.getSymbols(); + std::vector<lto::SymbolResolution> Resols(Syms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) { + Symbol *Sym = Syms[SymNum]; + lto::SymbolResolution &R = Resols[SymNum]; + ++SymNum; + SymbolBody *B = Sym->body(); + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + R.Prevailing = + !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && + B->File == &F; + + R.VisibleToRegularObj = + Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); + if (R.Prevailing) + undefine(Sym); } -} - -static void internalize(GlobalValue &GV) { - assert(!GV.hasLocalLinkage() && - "Trying to internalize a symbol with local linkage!"); - GV.setLinkage(GlobalValue::InternalLinkage); -} - -std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen( - const std::function<std::unique_ptr<TargetMachine>()> &TMFactory) { - unsigned NumThreads = Config->LtoJobs; - OwningData.resize(NumThreads); - - std::list<raw_svector_ostream> OSs; - std::vector<raw_pwrite_stream *> OSPtrs; - for (SmallString<0> &Obj : OwningData) { - OSs.emplace_back(Obj); - OSPtrs.push_back(&OSs.back()); - } - - splitCodeGen(std::move(Combined), OSPtrs, {}, TMFactory); - - std::vector<std::unique_ptr<InputFile>> ObjFiles; - for (SmallString<0> &Obj : OwningData) - ObjFiles.push_back(createObjectFile( - MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object"))); - - // If -save-temps is given, we need to save temporary objects to files. - // This is for debugging. - if (Config->SaveTemps) { - if (NumThreads == 1) { - saveBuffer(OwningData[0], Config->OutputFile + ".lto.o"); - } else { - for (unsigned I = 0; I < NumThreads; ++I) - saveBuffer(OwningData[I], Config->OutputFile + Twine(I) + ".lto.o"); - } - } - - return ObjFiles; + checkError(LTOObj->add(std::move(F.Obj), Resols)); } // Merge all the bitcode files we have seen, codegen the result -// and return the resulting ObjectFile. -std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() { - for (const auto &Name : InternalizedSyms) { - GlobalValue *GV = Combined->getNamedValue(Name.first()); - assert(GV); - internalize(*GV); +// and return the resulting ObjectFile(s). +std::vector<InputFile *> BitcodeCompiler::compile() { + std::vector<InputFile *> Ret; + unsigned MaxTasks = LTOObj->getMaxTasks(); + Buff.resize(MaxTasks); + + checkError(LTOObj->run([&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buff[Task])); + })); + + for (unsigned I = 0; I != MaxTasks; ++I) { + if (Buff[I].empty()) + continue; + if (Config->SaveTemps) { + if (MaxTasks == 1) + saveBuffer(Buff[I], Config->OutputFile + ".lto.o"); + else + saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.o"); + } + InputFile *Obj = createObjectFile(MemoryBufferRef(Buff[I], "lto.tmp")); + Ret.push_back(Obj); } - - std::string TheTriple = Combined->getTargetTriple(); - std::string Msg; - const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg); - if (!T) - fatal("target not found: " + Msg); - - // LLD supports the new relocations. - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - Options.RelaxELFRelocations = true; - - auto CreateTargetMachine = [&]() { - return std::unique_ptr<TargetMachine>(T->createTargetMachine( - TheTriple, "", "", Options, Config->Pic ? Reloc::PIC_ : Reloc::Static)); - }; - - std::unique_ptr<TargetMachine> TM = CreateTargetMachine(); - - // Update llvm.compiler.used so that optimizations won't strip - // off AsmUndefinedReferences. - updateCompilerUsed(*Combined, *TM, AsmUndefinedRefs); - - if (Config->SaveTemps) - saveBCFile(*Combined, Config->OutputFile + ".lto.bc"); - - runLTOPasses(*Combined, *TM); - if (HasError) - return {}; - - return runSplitCodegen(CreateTargetMachine); + return Ret; } diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h index 81dffb6004b2..b3d734f2d381 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.h +++ b/contrib/llvm/tools/lld/ELF/LTO.h @@ -23,9 +23,14 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/IR/Module.h" -#include "llvm/Linker/IRMover.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} +} namespace lld { namespace elf { @@ -36,17 +41,14 @@ class InputFile; class BitcodeCompiler { public: BitcodeCompiler(); + ~BitcodeCompiler(); + void add(BitcodeFile &F); - std::vector<std::unique_ptr<InputFile>> compile(); + std::vector<InputFile *> compile(); private: - std::vector<std::unique_ptr<InputFile>> runSplitCodegen( - const std::function<std::unique_ptr<llvm::TargetMachine>()> &TMFactory); - - std::unique_ptr<llvm::Module> Combined; - std::vector<SmallString<0>> OwningData; - llvm::StringSet<> InternalizedSyms; - llvm::StringSet<> AsmUndefinedRefs; + std::unique_ptr<llvm::lto::LTO> LTOObj; + std::vector<SmallString<0>> Buff; }; } } diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index 61abdc185e11..5057b57a4a54 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -8,8 +8,6 @@ //===----------------------------------------------------------------------===// // // This file contains the parser/evaluator of the linker script. -// It does not construct an AST but consume linker script directives directly. -// Results are written to Driver or Config object. // //===----------------------------------------------------------------------===// @@ -17,311 +15,971 @@ #include "Config.h" #include "Driver.h" #include "InputSection.h" +#include "Memory.h" #include "OutputSections.h" #include "ScriptParser.h" #include "Strings.h" -#include "Symbols.h" #include "SymbolTable.h" +#include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/StringSaver.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <tuple> +#include <vector> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +LinkerScriptBase *elf::ScriptBase; ScriptConfiguration *elf::ScriptConfig; -// This is an operator-precedence parser to parse and evaluate -// a linker script expression. For each linker script arithmetic -// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser -// is created and ran. -namespace { -class ExprParser : public ScriptParserBase { -public: - ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot) - : ScriptParserBase(Tokens), Dot(Dot) {} +template <class ELFT> static void addRegular(SymbolAssignment *Cmd) { + uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; + Symbol *Sym = Symtab<ELFT>::X->addRegular(Cmd->Name, Visibility, STT_NOTYPE, + 0, 0, STB_GLOBAL, nullptr, nullptr); + Cmd->Sym = Sym->body(); - uint64_t run(); + // If we have no SECTIONS then we don't have '.' and don't call + // assignAddresses(). We calculate symbol value immediately in this case. + if (!ScriptConfig->HasSections) + cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Cmd->Expression(0); +} -private: - uint64_t parsePrimary(); - uint64_t parseTernary(uint64_t Cond); - uint64_t apply(StringRef Op, uint64_t L, uint64_t R); - uint64_t parseExpr1(uint64_t Lhs, int MinPrec); - uint64_t parseExpr(); +template <class ELFT> static void addSynthetic(SymbolAssignment *Cmd) { + // If we have SECTIONS block then output sections haven't been created yet. + const OutputSectionBase *Sec = + ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); + Symbol *Sym = Symtab<ELFT>::X->addSynthetic( + Cmd->Name, Sec, 0, Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT); + Cmd->Sym = Sym->body(); - uint64_t Dot; -}; + // If we already know section then we can calculate symbol value immediately. + if (Sec) + cast<DefinedSynthetic>(Cmd->Sym)->Value = Cmd->Expression(0) - Sec->Addr; } -static int precedence(StringRef Op) { - return StringSwitch<int>(Op) - .Case("*", 4) - .Case("/", 4) - .Case("+", 3) - .Case("-", 3) - .Case("<", 2) - .Case(">", 2) - .Case(">=", 2) - .Case("<=", 2) - .Case("==", 2) - .Case("!=", 2) - .Case("&", 1) - .Default(-1); +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; } -static uint64_t evalExpr(std::vector<StringRef> &Tokens, uint64_t Dot) { - return ExprParser(Tokens, Dot).run(); +template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { + if (Cmd->Expression.IsAbsolute()) + addRegular<ELFT>(Cmd); + else + addSynthetic<ELFT>(Cmd); +} +// If a symbol was in PROVIDE(), we need to define it only when +// it is an undefined symbol. +template <class ELFT> static bool shouldDefine(SymbolAssignment *Cmd) { + if (Cmd->Name == ".") + return false; + if (!Cmd->Provide) + return true; + SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); + return B && B->isUndefined(); } -uint64_t ExprParser::run() { - uint64_t V = parseExpr(); - if (!atEOF() && !Error) - setError("stray token: " + peek()); - return V; +bool SymbolAssignment::classof(const BaseCommand *C) { + return C->Kind == AssignmentKind; } -// This is a part of the operator-precedence parser to evaluate -// arithmetic expressions in SECTIONS command. This function evaluates an -// integer literal, a parenthesized expression, the ALIGN function, -// or the special variable ".". -uint64_t ExprParser::parsePrimary() { - StringRef Tok = next(); - if (Tok == ".") - return Dot; - if (Tok == "(") { - uint64_t V = parseExpr(); - expect(")"); - return V; - } - if (Tok == "ALIGN") { - expect("("); - uint64_t V = parseExpr(); - expect(")"); - return alignTo(Dot, V); - } - uint64_t V = 0; - if (Tok.getAsInteger(0, V)) - setError("malformed number: " + Tok); - return V; +bool OutputSectionCommand::classof(const BaseCommand *C) { + return C->Kind == OutputSectionKind; } -uint64_t ExprParser::parseTernary(uint64_t Cond) { - next(); - uint64_t V = parseExpr(); - expect(":"); - uint64_t W = parseExpr(); - return Cond ? V : W; +bool InputSectionDescription::classof(const BaseCommand *C) { + return C->Kind == InputSectionKind; } -uint64_t ExprParser::apply(StringRef Op, uint64_t L, uint64_t R) { - if (Op == "*") - return L * R; - if (Op == "/") { - if (R == 0) { - error("division by zero"); - return 0; - } - return L / R; +bool AssertCommand::classof(const BaseCommand *C) { + return C->Kind == AssertKind; +} + +bool BytesDataCommand::classof(const BaseCommand *C) { + return C->Kind == BytesDataKind; +} + +template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; +template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; + +template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { + if (S->getFile()) + return sys::path::filename(S->getFile()->getName()); + return ""; +} + +template <class ELFT> +bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { + for (InputSectionDescription *ID : Opt.KeptSections) + if (ID->FilePat.match(basename(S))) + for (SectionPattern &P : ID->SectionPatterns) + if (P.SectionPat.match(S->Name)) + return true; + return false; +} + +static bool comparePriority(InputSectionData *A, InputSectionData *B) { + return getPriority(A->Name) < getPriority(B->Name); +} + +static bool compareName(InputSectionData *A, InputSectionData *B) { + return A->Name < B->Name; +} + +static bool compareAlignment(InputSectionData *A, InputSectionData *B) { + // ">" is not a mistake. Larger alignments are placed before smaller + // alignments in order to reduce the amount of padding necessary. + // This is compatible with GNU. + return A->Alignment > B->Alignment; +} + +static std::function<bool(InputSectionData *, InputSectionData *)> +getComparator(SortSectionPolicy K) { + switch (K) { + case SortSectionPolicy::Alignment: + return compareAlignment; + case SortSectionPolicy::Name: + return compareName; + case SortSectionPolicy::Priority: + return comparePriority; + default: + llvm_unreachable("unknown sort policy"); } - if (Op == "+") - return L + R; - if (Op == "-") - return L - R; - if (Op == "<") - return L < R; - if (Op == ">") - return L > R; - if (Op == ">=") - return L >= R; - if (Op == "<=") - return L <= R; - if (Op == "==") - return L == R; - if (Op == "!=") - return L != R; - if (Op == "&") - return L & R; - llvm_unreachable("invalid operator"); } -// This is a part of the operator-precedence parser. -// This function assumes that the remaining token stream starts -// with an operator. -uint64_t ExprParser::parseExpr1(uint64_t Lhs, int MinPrec) { - while (!atEOF()) { - // Read an operator and an expression. - StringRef Op1 = peek(); - if (Op1 == "?") - return parseTernary(Lhs); - if (precedence(Op1) < MinPrec) - return Lhs; - next(); - uint64_t Rhs = parsePrimary(); +template <class ELFT> +static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, + ConstraintKind Kind) { + if (Kind == ConstraintKind::NoConstraint) + return true; + bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { + auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); + return Sec->Flags & SHF_WRITE; + }); + return (IsRW && Kind == ConstraintKind::ReadWrite) || + (!IsRW && Kind == ConstraintKind::ReadOnly); +} - // Evaluate the remaining part of the expression first if the - // next operator has greater precedence than the previous one. - // For example, if we have read "+" and "3", and if the next - // operator is "*", then we'll evaluate 3 * ... part first. - while (!atEOF()) { - StringRef Op2 = peek(); - if (precedence(Op2) <= precedence(Op1)) - break; - Rhs = parseExpr1(Rhs, precedence(Op2)); +static void sortSections(InputSectionData **Begin, InputSectionData **End, + SortSectionPolicy K) { + if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) + std::stable_sort(Begin, End, getComparator(K)); +} + +// Compute and remember which sections the InputSectionDescription matches. +template <class ELFT> +void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { + // Collects all sections that satisfy constraints of I + // and attach them to I. + for (SectionPattern &Pat : I->SectionPatterns) { + size_t SizeBefore = I->Sections.size(); + + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { + if (!S->Live || S->Assigned) + continue; + + StringRef Filename = basename(S); + if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) + continue; + if (!Pat.SectionPat.match(S->Name)) + continue; + I->Sections.push_back(S); + S->Assigned = true; } - Lhs = apply(Op1, Lhs, Rhs); + // Sort sections as instructed by SORT-family commands and --sort-section + // option. Because SORT-family commands can be nested at most two depth + // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command + // line option is respected even if a SORT command is given, the exact + // behavior we have here is a bit complicated. Here are the rules. + // + // 1. If two SORT commands are given, --sort-section is ignored. + // 2. If one SORT command is given, and if it is not SORT_NONE, + // --sort-section is handled as an inner SORT command. + // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. + // 4. If no SORT command is given, sort according to --sort-section. + InputSectionData **Begin = I->Sections.data() + SizeBefore; + InputSectionData **End = I->Sections.data() + I->Sections.size(); + if (Pat.SortOuter != SortSectionPolicy::None) { + if (Pat.SortInner == SortSectionPolicy::Default) + sortSections(Begin, End, Config->SortSection); + else + sortSections(Begin, End, Pat.SortInner); + sortSections(Begin, End, Pat.SortOuter); + } } - return Lhs; } -// Reads and evaluates an arithmetic expression. -uint64_t ExprParser::parseExpr() { return parseExpr1(parsePrimary(), 0); } - template <class ELFT> -StringRef LinkerScript<ELFT>::getOutputSection(InputSectionBase<ELFT> *S) { - for (SectionRule &R : Opt.Sections) - if (globMatch(R.SectionPattern, S->getSectionName())) - return R.Dest; - return ""; +void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { + for (InputSectionBase<ELFT> *S : V) { + S->Live = false; + reportDiscarded(S); + } } template <class ELFT> -bool LinkerScript<ELFT>::isDiscarded(InputSectionBase<ELFT> *S) { - return getOutputSection(S) == "/DISCARD/"; +std::vector<InputSectionBase<ELFT> *> +LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { + std::vector<InputSectionBase<ELFT> *> Ret; + + for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { + auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); + if (!Cmd) + continue; + computeInputSections(Cmd); + for (InputSectionData *S : Cmd->Sections) + Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); + } + + return Ret; } template <class ELFT> -bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { - for (StringRef Pat : Opt.KeptSections) - if (globMatch(Pat, S->getSectionName())) - return true; - return false; +static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + // When using linker script the merge rules are different. + // Unfortunately, linker scripts are name based. This means that expressions + // like *(.foo*) can refer to multiple input sections that would normally be + // placed in different output sections. We cannot put them in different + // output sections or we would produce wrong results for + // start = .; *(.foo.*) end = .; *(.bar) + // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to + // another. The problem is that there is no way to layout those output + // sections such that the .foo sections are the only thing between the + // start and end symbols. + + // An extra annoyance is that we cannot simply disable merging of the contents + // of SHF_MERGE sections, but our implementation requires one output section + // per "kind" (string or not, which size/aligment). + // Fortunately, creating symbols in the middle of a merge section is not + // supported by bfd or gold, so we can just create multiple section in that + // case. + typedef typename ELFT::uint uintX_t; + uintX_t Flags = C->Flags & (SHF_MERGE | SHF_STRINGS); + + uintX_t Alignment = 0; + if (isa<MergeInputSection<ELFT>>(C)) + Alignment = std::max<uintX_t>(C->Alignment, C->Entsize); + + return SectionKey<ELFT::Is64Bits>{OutsecName, /*Type*/ 0, Flags, Alignment}; } template <class ELFT> -void LinkerScript<ELFT>::assignAddresses( - ArrayRef<OutputSectionBase<ELFT> *> Sections) { - // Orphan sections are sections present in the input files which - // are not explicitly placed into the output file by the linker script. - // We place orphan sections at end of file. - // Other linkers places them using some heuristics as described in - // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections. - for (OutputSectionBase<ELFT> *Sec : Sections) { - StringRef Name = Sec->getName(); - if (getSectionIndex(Name) == INT_MAX) - Opt.Commands.push_back({SectionKind, {}, Name}); - } +void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, + StringRef Name) { + OutputSectionBase *OutSec; + bool IsNew; + std::tie(OutSec, IsNew) = Factory.create(createKey(Sec, Name), Sec); + if (IsNew) + OutputSections->push_back(OutSec); + OutSec->addSection(Sec); +} - // Assign addresses as instructed by linker script SECTIONS sub-commands. - Dot = Out<ELFT>::ElfHeader->getSize() + Out<ELFT>::ProgramHeaders->getSize(); - uintX_t MinVA = std::numeric_limits<uintX_t>::max(); - uintX_t ThreadBssOffset = 0; +template <class ELFT> +void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { + for (unsigned I = 0; I < Opt.Commands.size(); ++I) { + auto Iter = Opt.Commands.begin() + I; + const std::unique_ptr<BaseCommand> &Base1 = *Iter; - for (SectionsCommand &Cmd : Opt.Commands) { - if (Cmd.Kind == AssignmentKind) { - uint64_t Val = evalExpr(Cmd.Expr, Dot); + // Handle symbol assignments outside of any output section. + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { + if (shouldDefine<ELFT>(Cmd)) + addSymbol<ELFT>(Cmd); + continue; + } - if (Cmd.Name == ".") { - Dot = Val; - } else { - auto *D = cast<DefinedRegular<ELFT>>(Symtab<ELFT>::X->find(Cmd.Name)); - D->Value = Val; - } + if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { + // If we don't have SECTIONS then output sections have already been + // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses + // will not be called, so ASSERT should be evaluated now. + if (!Opt.HasSections) + Cmd->Expression(0); continue; } - // Find all the sections with required name. There can be more than - // ont section with such name, if the alignment, flags or type - // attribute differs. - assert(Cmd.Kind == SectionKind); - for (OutputSectionBase<ELFT> *Sec : Sections) { - if (Sec->getName() != Cmd.Name) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { + std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); + + // The output section name `/DISCARD/' is special. + // Any input section assigned to it is discarded. + if (Cmd->Name == "/DISCARD/") { + discard(V); continue; + } - if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) { - uintX_t TVA = Dot + ThreadBssOffset; - TVA = alignTo(TVA, Sec->getAlignment()); - Sec->setVA(TVA); - ThreadBssOffset = TVA - Dot + Sec->getSize(); + // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive + // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input + // sections satisfy a given constraint. If not, a directive is handled + // as if it wasn't present from the beginning. + // + // Because we'll iterate over Commands many more times, the easiest + // way to "make it as if it wasn't present" is to just remove it. + if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { + for (InputSectionBase<ELFT> *S : V) + S->Assigned = false; + Opt.Commands.erase(Iter); + --I; continue; } - if (Sec->getFlags() & SHF_ALLOC) { - Dot = alignTo(Dot, Sec->getAlignment()); - Sec->setVA(Dot); - MinVA = std::min(MinVA, Dot); - Dot += Sec->getSize(); + // A directive may contain symbol definitions like this: + // ".foo : { ...; bar = .; }". Handle them. + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) + if (shouldDefine<ELFT>(OutCmd)) + addSymbol<ELFT>(OutCmd); + + // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign + // is given, input sections are aligned to that value, whether the + // given value is larger or smaller than the original section alignment. + if (Cmd->SubalignExpr) { + uint32_t Subalign = Cmd->SubalignExpr(0); + for (InputSectionBase<ELFT> *S : V) + S->Alignment = Subalign; + } + + // Add input sections to an output section. + for (InputSectionBase<ELFT> *S : V) + addSection(Factory, S, Cmd->Name); + } + } +} + +// Add sections that didn't match any sections command. +template <class ELFT> +void LinkerScript<ELFT>::addOrphanSections( + OutputSectionFactory<ELFT> &Factory) { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (S->Live && !S->OutSec) + addSection(Factory, S, getOutputSectionName(S->Name)); +} + +// Sets value of a section-defined symbol. Two kinds of +// symbols are processed: synthetic symbols, whose value +// is an offset from beginning of section and regular +// symbols whose value is absolute. +template <class ELFT> +static void assignSectionSymbol(SymbolAssignment *Cmd, + typename ELFT::uint Value) { + if (!Cmd->Sym) + return; + + if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { + Body->Section = Cmd->Expression.Section(); + Body->Value = Cmd->Expression(Value) - Body->Section->Addr; + return; + } + auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); + Body->Value = Cmd->Expression(Value); +} + +template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { + return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; +} + +template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { + if (!AlreadyOutputIS.insert(S).second) + return; + bool IsTbss = isTbss<ELFT>(CurOutSec); + + uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; + Pos = alignTo(Pos, S->Alignment); + S->OutSecOff = Pos - CurOutSec->Addr; + Pos += S->getSize(); + + // Update output section size after adding each section. This is so that + // SIZEOF works correctly in the case below: + // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } + CurOutSec->Size = Pos - CurOutSec->Addr; + + if (IsTbss) + ThreadBssOffset = Pos - Dot; + else + Dot = Pos; +} + +template <class ELFT> void LinkerScript<ELFT>::flush() { + if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) + return; + if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { + for (InputSection<ELFT> *I : OutSec->Sections) + output(I); + } else { + Dot += CurOutSec->Size; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { + if (CurOutSec == Sec) + return; + if (AlreadyOutputOS.count(Sec)) + return; + + flush(); + CurOutSec = Sec; + + Dot = alignTo(Dot, CurOutSec->Addralign); + CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; + + // If neither AT nor AT> is specified for an allocatable section, the linker + // will set the LMA such that the difference between VMA and LMA for the + // section is the same as the preceding output section in the same region + // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html + CurOutSec->setLMAOffset(LMAOffset); +} + +template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { + // This handles the assignments to symbol or to a location counter (.) + if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { + if (AssignCmd->Name == ".") { + // Update to location counter means update to section size. + uintX_t Val = AssignCmd->Expression(Dot); + if (Val < Dot) + error("unable to move location counter backward for: " + + CurOutSec->Name); + Dot = Val; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + assignSectionSymbol<ELFT>(AssignCmd, Dot); + return; + } + + // Handle BYTE(), SHORT(), LONG(), or QUAD(). + if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { + DataCmd->Offset = Dot - CurOutSec->Addr; + Dot += DataCmd->Size; + CurOutSec->Size = Dot - CurOutSec->Addr; + return; + } + + if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { + AssertCmd->Expression(Dot); + return; + } + + // It handles single input section description command, + // calculates and assigns the offsets for each section and also + // updates the output section size. + auto &ICmd = cast<InputSectionDescription>(Base); + for (InputSectionData *ID : ICmd.Sections) { + // We tentatively added all synthetic sections at the beginning and removed + // empty ones afterwards (because there is no way to know whether they were + // going be empty or not other than actually running linker scripts.) + // We need to ignore remains of empty sections. + if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) + if (Sec->empty()) continue; + + auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); + switchTo(IB->OutSec); + if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) + output(I); + else + flush(); + } +} + +template <class ELFT> +static std::vector<OutputSectionBase *> +findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { + std::vector<OutputSectionBase *> Ret; + for (OutputSectionBase *Sec : Sections) + if (Sec->getName() == Name) + Ret.push_back(Sec); + return Ret; +} + +// This function assigns offsets to input sections and an output section +// for a single sections command (e.g. ".text { *(.text); }"). +template <class ELFT> +void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { + if (Cmd->LMAExpr) + LMAOffset = Cmd->LMAExpr(Dot) - Dot; + std::vector<OutputSectionBase *> Sections = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (Sections.empty()) + return; + switchTo(Sections[0]); + + // Find the last section output location. We will output orphan sections + // there so that end symbols point to the correct location. + auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + return !isa<SymbolAssignment>(*Cmd); + }) + .base(); + for (auto I = Cmd->Commands.begin(); I != E; ++I) + process(**I); + for (OutputSectionBase *Base : Sections) + switchTo(Base); + flush(); + std::for_each(E, Cmd->Commands.end(), + [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); +} + +template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { + // It is common practice to use very generic linker scripts. So for any + // given run some of the output sections in the script will be empty. + // We could create corresponding empty output sections, but that would + // clutter the output. + // We instead remove trivially empty sections. The bfd linker seems even + // more aggressive at removing them. + auto Pos = std::remove_if( + Opt.Commands.begin(), Opt.Commands.end(), + [&](const std::unique_ptr<BaseCommand> &Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); + return false; + }); + Opt.Commands.erase(Pos, Opt.Commands.end()); +} + +static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { + for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) + if (!isa<InputSectionDescription>(*I)) + return false; + return true; +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { + // If the output section contains only symbol assignments, create a + // corresponding output section. The bfd linker seems to only create them if + // '.' is assigned to, but creating these section should not have any bad + // consequeces and gives us a section to put the symbol in. + uintX_t Flags = SHF_ALLOC; + uint32_t Type = SHT_NOBITS; + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + std::vector<OutputSectionBase *> Secs = + findSections<ELFT>(Cmd->Name, *OutputSections); + if (!Secs.empty()) { + Flags = Secs[0]->Flags; + Type = Secs[0]->Type; + continue; + } + + if (isAllSectionDescription(*Cmd)) + continue; + + auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); + OutputSections->push_back(OutSec); + } +} + +template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { + placeOrphanSections(); + + // If output section command doesn't specify any segments, + // and we haven't previously assigned any section to segment, + // then we simply assign section to the very first load segment. + // Below is an example of such linker script: + // PHDRS { seg PT_LOAD; } + // SECTIONS { .aaa : { *(.aaa) } } + std::vector<StringRef> DefPhdrs; + auto FirstPtLoad = + std::find_if(Opt.PhdrsCommands.begin(), Opt.PhdrsCommands.end(), + [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); + if (FirstPtLoad != Opt.PhdrsCommands.end()) + DefPhdrs.push_back(FirstPtLoad->Name); + + // Walk the commands and propagate the program headers to commands that don't + // explicitly specify them. + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd) + continue; + if (Cmd->Phdrs.empty()) + Cmd->Phdrs = DefPhdrs; + else + DefPhdrs = Cmd->Phdrs; + } + + removeEmptyCommands(); +} + +// When placing orphan sections, we want to place them after symbol assignments +// so that an orphan after +// begin_foo = .; +// foo : { *(foo) } +// end_foo = .; +// doesn't break the intended meaning of the begin/end symbols. +// We don't want to go over sections since Writer<ELFT>::sortSections is the +// one in charge of deciding the order of the sections. +// We don't want to go over alignments, since doing so in +// rx_sec : { *(rx_sec) } +// . = ALIGN(0x1000); +// /* The RW PT_LOAD starts here*/ +// rw_sec : { *(rw_sec) } +// would mean that the RW PT_LOAD would become unaligned. +static bool shouldSkip(const BaseCommand &Cmd) { + if (isa<OutputSectionCommand>(Cmd)) + return false; + const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); + if (!Assign) + return true; + return Assign->Name != "."; +} + +// Orphan sections are sections present in the input files which are not +// explicitly placed into the output file by the linker script. This just +// places them in the order already decided in OutputSections. +template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { + // The OutputSections are already in the correct order. + // This loops creates or moves commands as needed so that they are in the + // correct order. + int CmdIndex = 0; + + // As a horrible special case, skip the first . assignment if it is before any + // section. We do this because it is common to set a load address by starting + // the script with ". = 0xabcd" and the expectation is that every section is + // after that. + auto FirstSectionOrDotAssignment = + std::find_if(Opt.Commands.begin(), Opt.Commands.end(), + [](const std::unique_ptr<BaseCommand> &Cmd) { + if (isa<OutputSectionCommand>(*Cmd)) + return true; + const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); + if (!Assign) + return false; + return Assign->Name == "."; + }); + if (FirstSectionOrDotAssignment != Opt.Commands.end()) { + CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); + if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) + ++CmdIndex; + } + + for (OutputSectionBase *Sec : *OutputSections) { + StringRef Name = Sec->getName(); + + // Find the last spot where we can insert a command and still get the + // correct result. + auto CmdIter = Opt.Commands.begin() + CmdIndex; + auto E = Opt.Commands.end(); + while (CmdIter != E && shouldSkip(**CmdIter)) { + ++CmdIter; + ++CmdIndex; + } + + auto Pos = + std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + return Cmd && Cmd->Name == Name; + }); + if (Pos == E) { + Opt.Commands.insert(CmdIter, + llvm::make_unique<OutputSectionCommand>(Name)); + ++CmdIndex; + continue; + } + + // Continue from where we found it. + CmdIndex = (Pos - Opt.Commands.begin()) + 1; + } +} + +template <class ELFT> +void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { + // Assign addresses as instructed by linker script SECTIONS sub-commands. + Dot = 0; + + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { + if (Cmd->Name == ".") { + Dot = Cmd->Expression(Dot); + } else if (Cmd->Sym) { + assignSectionSymbol<ELFT>(Cmd, Dot); } + continue; } + + if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { + Cmd->Expression(Dot); + continue; + } + + auto *Cmd = cast<OutputSectionCommand>(Base.get()); + if (Cmd->AddrExpr) + Dot = Cmd->AddrExpr(Dot); + assignOffsets(Cmd); } + uintX_t MinVA = std::numeric_limits<uintX_t>::max(); + for (OutputSectionBase *Sec : *OutputSections) { + if (Sec->Flags & SHF_ALLOC) + MinVA = std::min<uint64_t>(MinVA, Sec->Addr); + else + Sec->Addr = 0; + } + + uintX_t HeaderSize = getHeaderSize(); + // If the linker script doesn't have PHDRS, add ElfHeader and ProgramHeaders + // now that we know we have space. + if (HeaderSize <= MinVA && !hasPhdrsCommands()) + allocateHeaders<ELFT>(Phdrs, *OutputSections); + // ELF and Program headers need to be right before the first section in - // memory. - // Set their addresses accordingly. - MinVA = alignDown(MinVA - Out<ELFT>::ElfHeader->getSize() - - Out<ELFT>::ProgramHeaders->getSize(), - Target->PageSize); - Out<ELFT>::ElfHeader->setVA(MinVA); - Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA); + // memory. Set their addresses accordingly. + MinVA = alignDown(MinVA - HeaderSize, Config->MaxPageSize); + Out<ELFT>::ElfHeader->Addr = MinVA; + Out<ELFT>::ProgramHeaders->Addr = Out<ELFT>::ElfHeader->Size + MinVA; +} + +// Creates program headers as instructed by PHDRS linker script command. +template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + + // Process PHDRS and FILEHDR keywords because they are not + // real output sections and cannot be added in the following loop. + for (const PhdrsCommand &Cmd : Opt.PhdrsCommands) { + Ret.emplace_back(Cmd.Type, Cmd.Flags == UINT_MAX ? PF_R : Cmd.Flags); + PhdrEntry &Phdr = Ret.back(); + + if (Cmd.HasFilehdr) + Phdr.add(Out<ELFT>::ElfHeader); + if (Cmd.HasPhdrs) + Phdr.add(Out<ELFT>::ProgramHeaders); + + if (Cmd.LMAExpr) { + Phdr.p_paddr = Cmd.LMAExpr(0); + Phdr.HasLMA = true; + } + } + + // Add output sections to program headers. + for (OutputSectionBase *Sec : *OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) + break; + + // Assign headers specified by linker script + for (size_t Id : getPhdrIndices(Sec->getName())) { + Ret[Id].add(Sec); + if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) + Ret[Id].p_flags |= Sec->getPhdrFlags(); + } + } + return Ret; +} + +template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { + // Ignore .interp section in case we have PHDRS specification + // and PT_INTERP isn't listed. + return !Opt.PhdrsCommands.empty() && + llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { + return Cmd.Type == PT_INTERP; + }) == Opt.PhdrsCommands.end(); +} + +template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->Name == Name) + return Cmd->Filler; + return 0; } template <class ELFT> -ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) { - auto I = Opt.Filler.find(Name); - if (I == Opt.Filler.end()) - return {}; - return I->second; +static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { + const endianness E = ELFT::TargetEndianness; + + switch (Size) { + case 1: + *Buf = (uint8_t)Data; + break; + case 2: + write16<E>(Buf, Data); + break; + case 4: + write32<E>(Buf, Data); + break; + case 8: + write64<E>(Buf, Data); + break; + default: + llvm_unreachable("unsupported Size argument"); + } +} + +template <class ELFT> +void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { + int I = getSectionIndex(Name); + if (I == INT_MAX) + return; + + auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); + for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) + if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) + writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); +} + +template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) + if (Cmd->LMAExpr && Cmd->Name == Name) + return true; + return false; } // Returns the index of the given section name in linker script // SECTIONS commands. Sections are laid out as the same order as they // were in the script. If a given name did not appear in the script, // it returns INT_MAX, so that it will be laid out at end of file. +template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { + for (int I = 0, E = Opt.Commands.size(); I != E; ++I) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) + if (Cmd->Name == Name) + return I; + return INT_MAX; +} + +template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { + return !Opt.PhdrsCommands.empty(); +} + template <class ELFT> -int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { - auto Begin = Opt.Commands.begin(); - auto End = Opt.Commands.end(); - auto I = std::find_if(Begin, End, [&](SectionsCommand &N) { - return N.Kind == SectionKind && N.Name == Name; - }); - return I == End ? INT_MAX : (I - Begin); +const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, + StringRef Name) { + static OutputSectionBase FakeSec("", 0, 0); + + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec; + + error(Loc + ": undefined section " + Name); + return &FakeSec; +} + +// This function is essentially the same as getOutputSection(Name)->Size, +// but it won't print out an error message if a given section is not found. +// +// Linker script does not create an output section if its content is empty. +// We want to allow SIZEOF(.foo) where .foo is a section which happened to +// be empty. That is why this function is different from getOutputSection(). +template <class ELFT> +uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { + for (OutputSectionBase *Sec : *OutputSections) + if (Sec->getName() == Name) + return Sec->Size; + return 0; +} + +template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { + return elf::getHeaderSize<ELFT>(); } -// A compartor to sort output sections. Returns -1 or 1 if -// A or B are mentioned in linker script. Otherwise, returns 0. template <class ELFT> -int LinkerScript<ELFT>::compareSections(StringRef A, StringRef B) { - int I = getSectionIndex(A); - int J = getSectionIndex(B); - if (I == INT_MAX && J == INT_MAX) - return 0; - return I < J ? -1 : 1; +uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) + return B->getVA<ELFT>(); + error(Loc + ": symbol not found: " + S); + return 0; +} + +template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { + return Symtab<ELFT>::X->find(S) != nullptr; +} + +template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); + return DR && !DR->Section; } +// Gets section symbol belongs to. Symbol "." doesn't belong to any +// specific section but isn't absolute at the same time, so we try +// to find suitable section for it as well. template <class ELFT> -void LinkerScript<ELFT>::addScriptedSymbols() { - for (SectionsCommand &Cmd : Opt.Commands) - if (Cmd.Kind == AssignmentKind) - if (Cmd.Name != "." && Symtab<ELFT>::X->find(Cmd.Name) == nullptr) - Symtab<ELFT>::X->addAbsolute(Cmd.Name, STV_DEFAULT); +const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { + SymbolBody *Sym = Symtab<ELFT>::X->find(S); + if (!Sym) { + if (OutputSections->empty()) + return nullptr; + return CurOutSec ? CurOutSec : (*OutputSections)[0]; + } + + if (auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) + return DR->Section ? DR->Section->OutSec : nullptr; + if (auto *DS = dyn_cast_or_null<DefinedSynthetic>(Sym)) + return DS->Section; + + return nullptr; } -class elf::ScriptParser : public ScriptParserBase { +// Returns indices of ELF headers containing specific section, identified +// by Name. Each index is a zero based number of ELF header listed within +// PHDRS {} script block. +template <class ELFT> +std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { + for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + if (!Cmd || Cmd->Name != SectionName) + continue; + + std::vector<size_t> Ret; + for (StringRef PhdrName : Cmd->Phdrs) + Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); + return Ret; + } + return {}; +} + +template <class ELFT> +size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { + size_t I = 0; + for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { + if (Cmd.Name == PhdrName) + return I; + ++I; + } + error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); + return 0; +} + +class elf::ScriptParser final : public ScriptParserBase { typedef void (ScriptParser::*Handler)(); public: - ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} + ScriptParser(MemoryBufferRef MB) + : ScriptParserBase(MB), + IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} - void run(); + void readLinkerScript(); + void readVersionScript(); + void readDynamicList(); private: void addFile(StringRef Path); @@ -331,53 +989,130 @@ private: void readExtern(); void readGroup(); void readInclude(); - void readNothing() {} void readOutput(); void readOutputArch(); void readOutputFormat(); + void readPhdrs(); void readSearchDir(); void readSections(); + void readVersion(); + void readVersionScriptCommand(); - void readLocationCounterValue(); - void readOutputSectionDescription(StringRef OutSec); - void readSymbolAssignment(StringRef Name); - std::vector<StringRef> readSectionsCommandExpr(); + SymbolAssignment *readAssignment(StringRef Name); + BytesDataCommand *readBytesDataCommand(StringRef Tok); + uint32_t readFill(); + OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); + uint32_t readOutputSectionFiller(StringRef Tok); + std::vector<StringRef> readOutputSectionPhdrs(); + InputSectionDescription *readInputSectionDescription(StringRef Tok); + StringMatcher readFilePatterns(); + std::vector<SectionPattern> readInputSectionsList(); + InputSectionDescription *readInputSectionRules(StringRef FilePattern); + unsigned readPhdrType(); + SortSectionPolicy readSortKind(); + SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); + SymbolAssignment *readProvideOrAssignment(StringRef Tok); + void readSort(); + Expr readAssert(); + + Expr readExpr(); + Expr readExpr1(Expr Lhs, int MinPrec); + StringRef readParenLiteral(); + Expr readPrimary(); + Expr readTernary(Expr Cond); + Expr readParenExpr(); + + // For parsing version script. + std::vector<SymbolVersion> readVersionExtern(); + void readAnonymousDeclaration(); + void readVersionDeclaration(StringRef VerStr); + std::vector<SymbolVersion> readSymbols(); - const static StringMap<Handler> Cmd; ScriptConfiguration &Opt = *ScriptConfig; - StringSaver Saver = {ScriptConfig->Alloc}; bool IsUnderSysroot; }; -const StringMap<elf::ScriptParser::Handler> elf::ScriptParser::Cmd = { - {"ENTRY", &ScriptParser::readEntry}, - {"EXTERN", &ScriptParser::readExtern}, - {"GROUP", &ScriptParser::readGroup}, - {"INCLUDE", &ScriptParser::readInclude}, - {"INPUT", &ScriptParser::readGroup}, - {"OUTPUT", &ScriptParser::readOutput}, - {"OUTPUT_ARCH", &ScriptParser::readOutputArch}, - {"OUTPUT_FORMAT", &ScriptParser::readOutputFormat}, - {"SEARCH_DIR", &ScriptParser::readSearchDir}, - {"SECTIONS", &ScriptParser::readSections}, - {";", &ScriptParser::readNothing}}; - -void ScriptParser::run() { +void ScriptParser::readDynamicList() { + expect("{"); + readAnonymousDeclaration(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScript() { + readVersionScriptCommand(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScriptCommand() { + if (consume("{")) { + readAnonymousDeclaration(); + return; + } + + while (!atEOF() && !Error && peek() != "}") { + StringRef VerStr = next(); + if (VerStr == "{") { + setError("anonymous version definition is used in " + "combination with other version definitions"); + return; + } + expect("{"); + readVersionDeclaration(VerStr); + } +} + +void ScriptParser::readVersion() { + expect("{"); + readVersionScriptCommand(); + expect("}"); +} + +void ScriptParser::readLinkerScript() { while (!atEOF()) { StringRef Tok = next(); - if (Handler Fn = Cmd.lookup(Tok)) - (this->*Fn)(); - else + if (Tok == ";") + continue; + + if (Tok == "ASSERT") { + Opt.Commands.emplace_back(new AssertCommand(readAssert())); + } else if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "PHDRS") { + readPhdrs(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else if (Tok == "VERSION") { + readVersion(); + } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + Opt.Commands.emplace_back(Cmd); + } else { setError("unknown directive: " + Tok); + } } } void ScriptParser::addFile(StringRef S) { if (IsUnderSysroot && S.startswith("/")) { - SmallString<128> Path; - (Config->Sysroot + S).toStringRef(Path); + SmallString<128> PathData; + StringRef Path = (Config->Sysroot + S).toStringRef(PathData); if (sys::fs::exists(Path)) { - Driver->addFile(Saver.save(Path.str())); + Driver->addFile(Saver.save(Path)); return; } } @@ -394,11 +1129,10 @@ void ScriptParser::addFile(StringRef S) { } else if (sys::fs::exists(S)) { Driver->addFile(S); } else { - std::string Path = findFromSearchPaths(S); - if (Path.empty()) - setError("unable to find " + S); + if (Optional<std::string> Path = findFromSearchPaths(S)) + Driver->addFile(Saver.save(*Path)); else - Driver->addFile(Saver.save(Path)); + setError("unable to find " + S); } } @@ -406,12 +1140,8 @@ void ScriptParser::readAsNeeded() { expect("("); bool Orig = Config->AsNeeded; Config->AsNeeded = true; - while (!Error) { - StringRef Tok = next(); - if (Tok == ")") - break; - addFile(Tok); - } + while (!Error && !consume(")")) + addFile(unquote(next())); Config->AsNeeded = Orig; } @@ -426,39 +1156,37 @@ void ScriptParser::readEntry() { void ScriptParser::readExtern() { expect("("); - while (!Error) { - StringRef Tok = next(); - if (Tok == ")") - return; - Config->Undefined.push_back(Tok); - } + while (!Error && !consume(")")) + Config->Undefined.push_back(next()); } void ScriptParser::readGroup() { expect("("); - while (!Error) { + while (!Error && !consume(")")) { StringRef Tok = next(); - if (Tok == ")") - return; - if (Tok == "AS_NEEDED") { + if (Tok == "AS_NEEDED") readAsNeeded(); - continue; - } - addFile(Tok); + else + addFile(unquote(Tok)); } } void ScriptParser::readInclude() { - StringRef Tok = next(); + StringRef Tok = unquote(next()); + // https://sourceware.org/binutils/docs/ld/File-Commands.html: + // The file will be searched for in the current directory, and in any + // directory specified with the -L option. auto MBOrErr = MemoryBuffer::getFile(Tok); + if (!MBOrErr) + if (Optional<std::string> Path = findFromSearchPaths(Tok)) + MBOrErr = MemoryBuffer::getFile(*Path); if (!MBOrErr) { setError("cannot open " + Tok); return; } - std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; - StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); - std::vector<StringRef> V = tokenize(S); - Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); + MemoryBufferRef MBRef = (*MBOrErr)->getMemBufferRef(); + make<std::unique_ptr<MemoryBuffer>>(std::move(*MBOrErr)); // take MB ownership + tokenize(MBRef); } void ScriptParser::readOutput() { @@ -466,138 +1194,782 @@ void ScriptParser::readOutput() { expect("("); StringRef Tok = next(); if (Config->OutputFile.empty()) - Config->OutputFile = Tok; + Config->OutputFile = unquote(Tok); expect(")"); } void ScriptParser::readOutputArch() { // Error checking only for now. expect("("); - next(); + skip(); expect(")"); } void ScriptParser::readOutputFormat() { // Error checking only for now. expect("("); - next(); + skip(); StringRef Tok = next(); if (Tok == ")") - return; + return; if (Tok != ",") { setError("unexpected token: " + Tok); return; } - next(); + skip(); expect(","); - next(); + skip(); expect(")"); } +void ScriptParser::readPhdrs() { + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + Opt.PhdrsCommands.push_back( + {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); + PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); + + PhdrCmd.Type = readPhdrType(); + do { + Tok = next(); + if (Tok == ";") + break; + if (Tok == "FILEHDR") + PhdrCmd.HasFilehdr = true; + else if (Tok == "PHDRS") + PhdrCmd.HasPhdrs = true; + else if (Tok == "AT") + PhdrCmd.LMAExpr = readParenExpr(); + else if (Tok == "FLAGS") { + expect("("); + // Passing 0 for the value of dot is a bit of a hack. It means that + // we accept expressions like ".|1". + PhdrCmd.Flags = readExpr()(0); + expect(")"); + } else + setError("unexpected header attribute: " + Tok); + } while (!Error); + } +} + void ScriptParser::readSearchDir() { expect("("); - Config->SearchPaths.push_back(next()); + StringRef Tok = next(); + if (!Config->Nostdlib) + Config->SearchPaths.push_back(unquote(Tok)); expect(")"); } void ScriptParser::readSections() { - Opt.DoLayout = true; + Opt.HasSections = true; + // -no-rosegment is used to avoid placing read only non-executable sections in + // their own segment. We do the same if SECTIONS command is present in linker + // script. See comment for computeFlags(). + Config->SingleRoRx = true; + expect("{"); - while (!Error && !skip("}")) { - StringRef Tok = peek(); - if (Tok == ".") { - readLocationCounterValue(); - continue; + while (!Error && !consume("}")) { + StringRef Tok = next(); + BaseCommand *Cmd = readProvideOrAssignment(Tok); + if (!Cmd) { + if (Tok == "ASSERT") + Cmd = new AssertCommand(readAssert()); + else + Cmd = readOutputSectionDescription(Tok); } - next(); - if (peek() == "=") - readSymbolAssignment(Tok); - else - readOutputSectionDescription(Tok); + Opt.Commands.emplace_back(Cmd); } } -void ScriptParser::readLocationCounterValue() { - expect("."); - expect("="); - std::vector<StringRef> Expr = readSectionsCommandExpr(); - if (Expr.empty()) - error("error in location counter expression"); - else - Opt.Commands.push_back({AssignmentKind, std::move(Expr), "."}); +static int precedence(StringRef Op) { + return StringSwitch<int>(Op) + .Cases("*", "/", 5) + .Cases("+", "-", 4) + .Cases("<<", ">>", 3) + .Cases("<", "<=", ">", ">=", "==", "!=", 2) + .Cases("&", "|", 1) + .Default(-1); } -void ScriptParser::readOutputSectionDescription(StringRef OutSec) { - Opt.Commands.push_back({SectionKind, {}, OutSec}); - expect(":"); - expect("{"); +StringMatcher ScriptParser::readFilePatterns() { + std::vector<StringRef> V; + while (!Error && !consume(")")) + V.push_back(next()); + return StringMatcher(V); +} - while (!Error && !skip("}")) { - StringRef Tok = next(); - if (Tok == "*") { - expect("("); - while (!Error && !skip(")")) - Opt.Sections.emplace_back(OutSec, next()); - } else if (Tok == "KEEP") { +SortSectionPolicy ScriptParser::readSortKind() { + if (consume("SORT") || consume("SORT_BY_NAME")) + return SortSectionPolicy::Name; + if (consume("SORT_BY_ALIGNMENT")) + return SortSectionPolicy::Alignment; + if (consume("SORT_BY_INIT_PRIORITY")) + return SortSectionPolicy::Priority; + if (consume("SORT_NONE")) + return SortSectionPolicy::None; + return SortSectionPolicy::Default; +} + +// Method reads a list of sequence of excluded files and section globs given in +// a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ +// Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) +// The semantics of that is next: +// * Include .foo.1 from every file. +// * Include .foo.2 from every file but a.o +// * Include .foo.3 from every file but b.o +std::vector<SectionPattern> ScriptParser::readInputSectionsList() { + std::vector<SectionPattern> Ret; + while (!Error && peek() != ")") { + StringMatcher ExcludeFilePat; + if (consume("EXCLUDE_FILE")) { expect("("); - expect("*"); + ExcludeFilePat = readFilePatterns(); + } + + std::vector<StringRef> V; + while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") + V.push_back(next()); + + if (!V.empty()) + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); + else + setError("section pattern is expected"); + } + return Ret; +} + +// Reads contents of "SECTIONS" directive. That directive contains a +// list of glob patterns for input sections. The grammar is as follows. +// +// <patterns> ::= <section-list> +// | <sort> "(" <section-list> ")" +// | <sort> "(" <sort> "(" <section-list> ")" ")" +// +// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" +// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" +// +// <section-list> is parsed by readInputSectionsList(). +InputSectionDescription * +ScriptParser::readInputSectionRules(StringRef FilePattern) { + auto *Cmd = new InputSectionDescription(FilePattern); + expect("("); + while (!Error && !consume(")")) { + SortSectionPolicy Outer = readSortKind(); + SortSectionPolicy Inner = SortSectionPolicy::Default; + std::vector<SectionPattern> V; + if (Outer != SortSectionPolicy::Default) { expect("("); - while (!Error && !skip(")")) { - StringRef Sec = next(); - Opt.Sections.emplace_back(OutSec, Sec); - Opt.KeptSections.push_back(Sec); + Inner = readSortKind(); + if (Inner != SortSectionPolicy::Default) { + expect("("); + V = readInputSectionsList(); + expect(")"); + } else { + V = readInputSectionsList(); } expect(")"); } else { + V = readInputSectionsList(); + } + + for (SectionPattern &Pat : V) { + Pat.SortInner = Inner; + Pat.SortOuter = Outer; + } + + std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); + } + return Cmd; +} + +InputSectionDescription * +ScriptParser::readInputSectionDescription(StringRef Tok) { + // Input section wildcard can be surrounded by KEEP. + // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + if (Tok == "KEEP") { + expect("("); + StringRef FilePattern = next(); + InputSectionDescription *Cmd = readInputSectionRules(FilePattern); + expect(")"); + Opt.KeptSections.push_back(Cmd); + return Cmd; + } + return readInputSectionRules(Tok); +} + +void ScriptParser::readSort() { + expect("("); + expect("CONSTRUCTORS"); + expect(")"); +} + +Expr ScriptParser::readAssert() { + expect("("); + Expr E = readExpr(); + expect(","); + StringRef Msg = unquote(next()); + expect(")"); + return [=](uint64_t Dot) { + uint64_t V = E(Dot); + if (!V) + error(Msg); + return V; + }; +} + +// Reads a FILL(expr) command. We handle the FILL command as an +// alias for =fillexp section attribute, which is different from +// what GNU linkers do. +// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html +uint32_t ScriptParser::readFill() { + expect("("); + uint32_t V = readOutputSectionFiller(next()); + expect(")"); + expect(";"); + return V; +} + +OutputSectionCommand * +ScriptParser::readOutputSectionDescription(StringRef OutSec) { + OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); + Cmd->Location = getCurrentLocation(); + + // Read an address expression. + // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address + if (peek() != ":") + Cmd->AddrExpr = readExpr(); + + expect(":"); + + if (consume("AT")) + Cmd->LMAExpr = readParenExpr(); + if (consume("ALIGN")) + Cmd->AlignExpr = readParenExpr(); + if (consume("SUBALIGN")) + Cmd->SubalignExpr = readParenExpr(); + + // Parse constraints. + if (consume("ONLY_IF_RO")) + Cmd->Constraint = ConstraintKind::ReadOnly; + if (consume("ONLY_IF_RW")) + Cmd->Constraint = ConstraintKind::ReadWrite; + expect("{"); + + while (!Error && !consume("}")) { + StringRef Tok = next(); + if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { + Cmd->Commands.emplace_back(Assignment); + } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { + Cmd->Commands.emplace_back(Data); + } else if (Tok == "ASSERT") { + Cmd->Commands.emplace_back(new AssertCommand(readAssert())); + expect(";"); + } else if (Tok == "FILL") { + Cmd->Filler = readFill(); + } else if (Tok == "SORT") { + readSort(); + } else if (peek() == "(") { + Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); + } else { setError("unknown command " + Tok); } } + Cmd->Phdrs = readOutputSectionPhdrs(); - StringRef Tok = peek(); - if (Tok.startswith("=")) { - if (!Tok.startswith("=0x")) { - setError("filler should be a hexadecimal value"); - return; + if (consume("=")) + Cmd->Filler = readOutputSectionFiller(next()); + else if (peek().startswith("=")) + Cmd->Filler = readOutputSectionFiller(next().drop_front()); + + return Cmd; +} + +// Read "=<number>" where <number> is an octal/decimal/hexadecimal number. +// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// +// ld.gold is not fully compatible with ld.bfd. ld.bfd handles +// hexstrings as blobs of arbitrary sizes, while ld.gold handles them +// as 32-bit big-endian values. We will do the same as ld.gold does +// because it's simpler than what ld.bfd does. +uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { + uint32_t V; + if (!Tok.getAsInteger(0, V)) + return V; + setError("invalid filler expression: " + Tok); + return 0; +} + +SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { + expect("("); + SymbolAssignment *Cmd = readAssignment(next()); + Cmd->Provide = Provide; + Cmd->Hidden = Hidden; + expect(")"); + expect(";"); + return Cmd; +} + +SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { + SymbolAssignment *Cmd = nullptr; + if (peek() == "=" || peek() == "+=") { + Cmd = readAssignment(Tok); + expect(";"); + } else if (Tok == "PROVIDE") { + Cmd = readProvideHidden(true, false); + } else if (Tok == "HIDDEN") { + Cmd = readProvideHidden(false, true); + } else if (Tok == "PROVIDE_HIDDEN") { + Cmd = readProvideHidden(true, true); + } + return Cmd; +} + +static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { + if (S == ".") + return Dot; + return ScriptBase->getSymbolValue(Loc, S); +} + +static bool isAbsolute(StringRef S) { + if (S == ".") + return false; + return ScriptBase->isAbsolute(S); +} + +SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { + StringRef Op = next(); + Expr E; + assert(Op == "=" || Op == "+="); + if (consume("ABSOLUTE")) { + // The RHS may be something like "ABSOLUTE(.) & 0xff". + // Call readExpr1 to read the whole expression. + E = readExpr1(readParenExpr(), 0); + E.IsAbsolute = [] { return true; }; + } else { + E = readExpr(); + } + if (Op == "+=") { + std::string Loc = getCurrentLocation(); + E = [=](uint64_t Dot) { + return getSymbolValue(Loc, Name, Dot) + E(Dot); + }; + } + return new SymbolAssignment(Name, E); +} + +// This is an operator-precedence parser to parse a linker +// script expression. +Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } + +static Expr combine(StringRef Op, Expr L, Expr R) { + if (Op == "*") + return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; + if (Op == "/") { + return [=](uint64_t Dot) -> uint64_t { + uint64_t RHS = R(Dot); + if (RHS == 0) { + error("division by zero"); + return 0; + } + return L(Dot) / RHS; + }; + } + if (Op == "+") + return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, + [=] { return L.IsAbsolute() && R.IsAbsolute(); }, + [=] { + const OutputSectionBase *S = L.Section(); + return S ? S : R.Section(); + }}; + if (Op == "-") + return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; + if (Op == "<<") + return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; + if (Op == ">>") + return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; + if (Op == "<") + return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; + if (Op == ">") + return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; + if (Op == ">=") + return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; + if (Op == "<=") + return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; + if (Op == "==") + return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; + if (Op == "!=") + return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; + if (Op == "&") + return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; + if (Op == "|") + return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; + llvm_unreachable("invalid operator"); +} + +// This is a part of the operator-precedence parser. This function +// assumes that the remaining token stream starts with an operator. +Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + while (!atEOF() && !Error) { + // Read an operator and an expression. + if (consume("?")) + return readTernary(Lhs); + StringRef Op1 = peek(); + if (precedence(Op1) < MinPrec) + break; + skip(); + Expr Rhs = readPrimary(); + + // Evaluate the remaining part of the expression first if the + // next operator has greater precedence than the previous one. + // For example, if we have read "+" and "3", and if the next + // operator is "*", then we'll evaluate 3 * ... part first. + while (!atEOF()) { + StringRef Op2 = peek(); + if (precedence(Op2) <= precedence(Op1)) + break; + Rhs = readExpr1(Rhs, precedence(Op2)); } - Tok = Tok.substr(3); - Opt.Filler[OutSec] = parseHex(Tok); - next(); + + Lhs = combine(Op1, Lhs, Rhs); } + return Lhs; } -void ScriptParser::readSymbolAssignment(StringRef Name) { - expect("="); - std::vector<StringRef> Expr = readSectionsCommandExpr(); - if (Expr.empty()) - error("error in symbol assignment expression"); - else - Opt.Commands.push_back({AssignmentKind, std::move(Expr), Name}); +uint64_t static getConstant(StringRef S) { + if (S == "COMMONPAGESIZE") + return Target->PageSize; + if (S == "MAXPAGESIZE") + return Config->MaxPageSize; + error("unknown constant: " + S); + return 0; +} + +// Parses Tok as an integer. Returns true if successful. +// It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") +// and decimal numbers. Decimal numbers may have "K" (kilo) or +// "M" (mega) prefixes. +static bool readInteger(StringRef Tok, uint64_t &Result) { + // Negative number + if (Tok.startswith("-")) { + if (!readInteger(Tok.substr(1), Result)) + return false; + Result = -Result; + return true; + } + + // Hexadecimal + if (Tok.startswith_lower("0x")) + return !Tok.substr(2).getAsInteger(16, Result); + if (Tok.endswith_lower("H")) + return !Tok.drop_back().getAsInteger(16, Result); + + // Decimal + int Suffix = 1; + if (Tok.endswith_lower("K")) { + Suffix = 1024; + Tok = Tok.drop_back(); + } else if (Tok.endswith_lower("M")) { + Suffix = 1024 * 1024; + Tok = Tok.drop_back(); + } + if (Tok.getAsInteger(10, Result)) + return false; + Result *= Suffix; + return true; +} + +BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { + int Size = StringSwitch<unsigned>(Tok) + .Case("BYTE", 1) + .Case("SHORT", 2) + .Case("LONG", 4) + .Case("QUAD", 8) + .Default(-1); + if (Size == -1) + return nullptr; + + return new BytesDataCommand(readParenExpr(), Size); +} + +StringRef ScriptParser::readParenLiteral() { + expect("("); + StringRef Tok = next(); + expect(")"); + return Tok; +} + +Expr ScriptParser::readPrimary() { + if (peek() == "(") + return readParenExpr(); + + StringRef Tok = next(); + std::string Location = getCurrentLocation(); + + if (Tok == "~") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return ~E(Dot); }; + } + if (Tok == "-") { + Expr E = readPrimary(); + return [=](uint64_t Dot) { return -E(Dot); }; + } + + // Built-in functions are parsed here. + // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. + if (Tok == "ADDR") { + StringRef Name = readParenLiteral(); + return {[=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addr; + }, + [=] { return false; }, + [=] { return ScriptBase->getOutputSection(Location, Name); }}; + } + if (Tok == "LOADADDR") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->getLMA(); + }; + } + if (Tok == "ASSERT") + return readAssert(); + if (Tok == "ALIGN") { + expect("("); + Expr E = readExpr(); + if (consume(",")) { + Expr E2 = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; + } + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "CONSTANT") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return getConstant(Name); }; + } + if (Tok == "DEFINED") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; + } + if (Tok == "SEGMENT_START") { + expect("("); + skip(); + expect(","); + Expr E = readExpr(); + expect(")"); + return [=](uint64_t Dot) { return E(Dot); }; + } + if (Tok == "DATA_SEGMENT_ALIGN") { + expect("("); + Expr E = readExpr(); + expect(","); + readExpr(); + expect(")"); + return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; + } + if (Tok == "DATA_SEGMENT_END") { + expect("("); + expect("."); + expect(")"); + return [](uint64_t Dot) { return Dot; }; + } + // GNU linkers implements more complicated logic to handle + // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to + // the next page boundary for simplicity. + if (Tok == "DATA_SEGMENT_RELRO_END") { + expect("("); + readExpr(); + expect(","); + readExpr(); + expect(")"); + return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; + } + if (Tok == "SIZEOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; + } + if (Tok == "ALIGNOF") { + StringRef Name = readParenLiteral(); + return [=](uint64_t Dot) { + return ScriptBase->getOutputSection(Location, Name)->Addralign; + }; + } + if (Tok == "SIZEOF_HEADERS") + return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; + + // Tok is a literal number. + uint64_t V; + if (readInteger(Tok, V)) + return [=](uint64_t Dot) { return V; }; + + // Tok is a symbol name. + if (Tok != "." && !isValidCIdentifier(Tok)) + setError("malformed number: " + Tok); + return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, + [=] { return isAbsolute(Tok); }, + [=] { return ScriptBase->getSymbolSection(Tok); }}; } -std::vector<StringRef> ScriptParser::readSectionsCommandExpr() { - std::vector<StringRef> Expr; - while (!Error) { +Expr ScriptParser::readTernary(Expr Cond) { + Expr L = readExpr(); + expect(":"); + Expr R = readExpr(); + return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; +} + +Expr ScriptParser::readParenExpr() { + expect("("); + Expr E = readExpr(); + expect(")"); + return E; +} + +std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { + std::vector<StringRef> Phdrs; + while (!Error && peek().startswith(":")) { StringRef Tok = next(); - if (Tok == ";") + Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); + } + return Phdrs; +} + +// Read a program header type name. The next token must be a +// name of a program header type or a constant (e.g. "0x3"). +unsigned ScriptParser::readPhdrType() { + StringRef Tok = next(); + uint64_t Val; + if (readInteger(Tok, Val)) + return Val; + + unsigned Ret = StringSwitch<unsigned>(Tok) + .Case("PT_NULL", PT_NULL) + .Case("PT_LOAD", PT_LOAD) + .Case("PT_DYNAMIC", PT_DYNAMIC) + .Case("PT_INTERP", PT_INTERP) + .Case("PT_NOTE", PT_NOTE) + .Case("PT_SHLIB", PT_SHLIB) + .Case("PT_PHDR", PT_PHDR) + .Case("PT_TLS", PT_TLS) + .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) + .Case("PT_GNU_STACK", PT_GNU_STACK) + .Case("PT_GNU_RELRO", PT_GNU_RELRO) + .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) + .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) + .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) + .Default(-1); + + if (Ret == (unsigned)-1) { + setError("invalid program header type: " + Tok); + return PT_NULL; + } + return Ret; +} + +// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". +void ScriptParser::readAnonymousDeclaration() { + // Read global symbols first. "global:" is default, so if there's + // no label, we assume global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionScriptGlobals = readSymbols(); + + // Next, read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + setError("local symbol list for anonymous version is not supported"); + } + } + expect("}"); + expect(";"); +} + +// Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". +void ScriptParser::readVersionDeclaration(StringRef VerStr) { + // Identifiers start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. + uint16_t VersionId = Config->VersionDefinitions.size() + 2; + Config->VersionDefinitions.push_back({VerStr, VersionId}); + + // Read global symbols. + if (consume("global:") || peek() != "local:") + Config->VersionDefinitions.back().Globals = readSymbols(); + + // Read local symbols. + if (consume("local:")) { + if (consume("*")) { + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + expect(";"); + } else { + for (SymbolVersion V : readSymbols()) + Config->VersionScriptLocals.push_back(V); + } + } + expect("}"); + + // Each version may have a parent version. For example, "Ver2" + // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" + // as a parent. This version hierarchy is, probably against your + // instinct, purely for hint; the runtime doesn't care about it + // at all. In LLD, we simply ignore it. + if (peek() != ";") + skip(); + expect(";"); +} + +// Reads a list of symbols for a versions cript. +std::vector<SymbolVersion> ScriptParser::readSymbols() { + std::vector<SymbolVersion> Ret; + for (;;) { + if (consume("extern")) { + for (SymbolVersion V : readVersionExtern()) + Ret.push_back(V); + continue; + } + + if (peek() == "}" || peek() == "local:" || Error) break; - Expr.push_back(Tok); + StringRef Tok = next(); + Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); + expect(";"); } - return Expr; + return Ret; } -static bool isUnderSysroot(StringRef Path) { - if (Config->Sysroot == "") - return false; - for (; !Path.empty(); Path = sys::path::parent_path(Path)) - if (sys::fs::equivalent(Config->Sysroot, Path)) - return true; - return false; +// Reads an "extern C++" directive, e.g., +// "extern "C++" { ns::*; "f(int, double)"; };" +std::vector<SymbolVersion> ScriptParser::readVersionExtern() { + StringRef Tok = next(); + bool IsCXX = Tok == "\"C++\""; + if (!IsCXX && Tok != "\"C\"") + setError("Unknown language"); + expect("{"); + + std::vector<SymbolVersion> Ret; + while (!Error && peek() != "}") { + StringRef Tok = next(); + bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); + Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + expect(";"); + } + + expect("}"); + expect(";"); + return Ret; } -// Entry point. void elf::readLinkerScript(MemoryBufferRef MB) { - StringRef Path = MB.getBufferIdentifier(); - ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).run(); + ScriptParser(MB).readLinkerScript(); +} + +void elf::readVersionScript(MemoryBufferRef MB) { + ScriptParser(MB).readVersionScript(); +} + +void elf::readDynamicList(MemoryBufferRef MB) { + ScriptParser(MB).readDynamicList(); } template class elf::LinkerScript<ELF32LE>; diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.h b/contrib/llvm/tools/lld/ELF/LinkerScript.h index 768f78a66468..505162f0ab43 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.h +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.h @@ -10,86 +10,279 @@ #ifndef LLD_ELF_LINKER_SCRIPT_H #define LLD_ELF_LINKER_SCRIPT_H +#include "Config.h" +#include "Strings.h" +#include "Writer.h" #include "lld/Core/LLVM.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/Support/Allocator.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <functional> +#include <memory> +#include <vector> namespace lld { namespace elf { +class DefinedCommon; +class ScriptParser; +class SymbolBody; +template <class ELFT> class InputSectionBase; +template <class ELFT> class InputSection; +class OutputSectionBase; +template <class ELFT> class OutputSectionFactory; +class InputSectionData; + +// This represents an expression in the linker script. +// ScriptParser::readExpr reads an expression and returns an Expr. +// Later, we evaluate the expression by calling the function +// with the value of special context variable ".". +struct Expr { + std::function<uint64_t(uint64_t)> Val; + std::function<bool()> IsAbsolute; + + // If expression is section-relative the function below is used + // to get the output section pointer. + std::function<const OutputSectionBase *()> Section; + + uint64_t operator()(uint64_t Dot) const { return Val(Dot); } + operator bool() const { return (bool)Val; } + + Expr(std::function<uint64_t(uint64_t)> Val, std::function<bool()> IsAbsolute, + std::function<const OutputSectionBase *()> Section) + : Val(Val), IsAbsolute(IsAbsolute), Section(Section) {} + template <typename T> + Expr(T V) : Expr(V, [] { return true; }, [] { return nullptr; }) {} + Expr() : Expr(nullptr) {} +}; + // Parses a linker script. Calling this function updates // Config and ScriptConfig. void readLinkerScript(MemoryBufferRef MB); -class ScriptParser; -template <class ELFT> class InputSectionBase; -template <class ELFT> class OutputSectionBase; +// Parses a version script. +void readVersionScript(MemoryBufferRef MB); + +void readDynamicList(MemoryBufferRef MB); + +// This enum is used to implement linker script SECTIONS command. +// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS +enum SectionsCommandKind { + AssignmentKind, // . = expr or <sym> = expr + OutputSectionKind, + InputSectionKind, + AssertKind, // ASSERT(expr) + BytesDataKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) +}; + +struct BaseCommand { + BaseCommand(int K) : Kind(K) {} + + virtual ~BaseCommand() = default; -// This class represents each rule in SECTIONS command. -struct SectionRule { - SectionRule(StringRef D, StringRef S) - : Dest(D), SectionPattern(S) {} + int Kind; +}; + +// This represents ". = <expr>" or "<symbol> = <expr>". +struct SymbolAssignment : BaseCommand { + SymbolAssignment(StringRef Name, Expr E) + : BaseCommand(AssignmentKind), Name(Name), Expression(E) {} + + static bool classof(const BaseCommand *C); + + // The LHS of an expression. Name is either a symbol name or ".". + StringRef Name; + SymbolBody *Sym = nullptr; - StringRef Dest; + // The RHS of an expression. + Expr Expression; - StringRef SectionPattern; + // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. + bool Provide = false; + bool Hidden = false; }; -// This enum represents what we can observe in SECTIONS tag of script: -// ExprKind is a location counter change, like ". = . + 0x1000" -// SectionKind is a description of output section, like ".data :..." -enum SectionsCommandKind { SectionKind, AssignmentKind }; +// Linker scripts allow additional constraints to be put on ouput sections. +// If an output section is marked as ONLY_IF_RO, the section is created +// only if its input sections are read-only. Likewise, an output section +// with ONLY_IF_RW is created if all input sections are RW. +enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; + +struct OutputSectionCommand : BaseCommand { + OutputSectionCommand(StringRef Name) + : BaseCommand(OutputSectionKind), Name(Name) {} + + static bool classof(const BaseCommand *C); -struct SectionsCommand { - SectionsCommandKind Kind; - std::vector<StringRef> Expr; StringRef Name; + Expr AddrExpr; + Expr AlignExpr; + Expr LMAExpr; + Expr SubalignExpr; + std::vector<std::unique_ptr<BaseCommand>> Commands; + std::vector<StringRef> Phdrs; + uint32_t Filler = 0; + ConstraintKind Constraint = ConstraintKind::NoConstraint; + std::string Location; }; -// ScriptConfiguration holds linker script parse results. -struct ScriptConfiguration { - // SECTIONS commands. - std::vector<SectionRule> Sections; +// This struct represents one section match pattern in SECTIONS() command. +// It can optionally have negative match pattern for EXCLUDED_FILE command. +// Also it may be surrounded with SORT() command, so contains sorting rules. +struct SectionPattern { + SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) + : ExcludedFilePat(Pat1), SectionPat(Pat2) {} + + StringMatcher ExcludedFilePat; + StringMatcher SectionPat; + SortSectionPolicy SortOuter; + SortSectionPolicy SortInner; +}; + +struct InputSectionDescription : BaseCommand { + InputSectionDescription(StringRef FilePattern) + : BaseCommand(InputSectionKind), FilePat(FilePattern) {} - // Section fill attribute for each section. - llvm::StringMap<std::vector<uint8_t>> Filler; + static bool classof(const BaseCommand *C); + StringMatcher FilePat; + + // Input sections that matches at least one of SectionPatterns + // will be associated with this InputSectionDescription. + std::vector<SectionPattern> SectionPatterns; + + std::vector<InputSectionData *> Sections; +}; + +// Represents an ASSERT(). +struct AssertCommand : BaseCommand { + AssertCommand(Expr E) : BaseCommand(AssertKind), Expression(E) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; +}; + +// Represents BYTE(), SHORT(), LONG(), or QUAD(). +struct BytesDataCommand : BaseCommand { + BytesDataCommand(Expr E, unsigned Size) + : BaseCommand(BytesDataKind), Expression(E), Size(Size) {} + + static bool classof(const BaseCommand *C); + + Expr Expression; + unsigned Offset; + unsigned Size; +}; + +struct PhdrsCommand { + StringRef Name; + unsigned Type; + bool HasFilehdr; + bool HasPhdrs; + unsigned Flags; + Expr LMAExpr; +}; + +class LinkerScriptBase { +protected: + ~LinkerScriptBase() = default; + +public: + virtual uint64_t getHeaderSize() = 0; + virtual uint64_t getSymbolValue(const Twine &Loc, StringRef S) = 0; + virtual bool isDefined(StringRef S) = 0; + virtual bool isAbsolute(StringRef S) = 0; + virtual const OutputSectionBase *getSymbolSection(StringRef S) = 0; + virtual const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) = 0; + virtual uint64_t getOutputSectionSize(StringRef S) = 0; +}; + +// ScriptConfiguration holds linker script parse results. +struct ScriptConfiguration { // Used to assign addresses to sections. - std::vector<SectionsCommand> Commands; + std::vector<std::unique_ptr<BaseCommand>> Commands; - bool DoLayout = false; + // Used to assign sections to headers. + std::vector<PhdrsCommand> PhdrsCommands; - llvm::BumpPtrAllocator Alloc; + bool HasSections = false; // List of section patterns specified with KEEP commands. They will // be kept even if they are unused and --gc-sections is specified. - std::vector<StringRef> KeptSections; + std::vector<InputSectionDescription *> KeptSections; }; extern ScriptConfiguration *ScriptConfig; // This is a runner of the linker script. -template <class ELFT> class LinkerScript { +template <class ELFT> class LinkerScript final : public LinkerScriptBase { typedef typename ELFT::uint uintX_t; public: - StringRef getOutputSection(InputSectionBase<ELFT> *S); - ArrayRef<uint8_t> getFiller(StringRef Name); - bool isDiscarded(InputSectionBase<ELFT> *S); + LinkerScript(); + ~LinkerScript(); + + void processCommands(OutputSectionFactory<ELFT> &Factory); + void addOrphanSections(OutputSectionFactory<ELFT> &Factory); + void removeEmptyCommands(); + void adjustSectionsBeforeSorting(); + void adjustSectionsAfterSorting(); + + std::vector<PhdrEntry> createPhdrs(); + bool ignoreInterpSection(); + + uint32_t getFiller(StringRef Name); + void writeDataBytes(StringRef Name, uint8_t *Buf); + bool hasLMA(StringRef Name); bool shouldKeep(InputSectionBase<ELFT> *S); - void assignAddresses(ArrayRef<OutputSectionBase<ELFT> *> S); - int compareSections(StringRef A, StringRef B); - void addScriptedSymbols(); + void assignOffsets(OutputSectionCommand *Cmd); + void placeOrphanSections(); + void assignAddresses(std::vector<PhdrEntry> &Phdrs); + bool hasPhdrsCommands(); + uint64_t getHeaderSize() override; + uint64_t getSymbolValue(const Twine &Loc, StringRef S) override; + bool isDefined(StringRef S) override; + bool isAbsolute(StringRef S) override; + const OutputSectionBase *getSymbolSection(StringRef S) override; + const OutputSectionBase *getOutputSection(const Twine &Loc, + StringRef S) override; + uint64_t getOutputSectionSize(StringRef S) override; + + std::vector<OutputSectionBase *> *OutputSections; + + int getSectionIndex(StringRef Name); private: + void computeInputSections(InputSectionDescription *); + + void addSection(OutputSectionFactory<ELFT> &Factory, + InputSectionBase<ELFT> *Sec, StringRef Name); + void discard(ArrayRef<InputSectionBase<ELFT> *> V); + + std::vector<InputSectionBase<ELFT> *> + createInputSectionList(OutputSectionCommand &Cmd); + // "ScriptConfig" is a bit too long, so define a short name for it. ScriptConfiguration &Opt = *ScriptConfig; - int getSectionIndex(StringRef Name); + std::vector<size_t> getPhdrIndices(StringRef SectionName); + size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName); uintX_t Dot; + uintX_t LMAOffset = 0; + OutputSectionBase *CurOutSec = nullptr; + uintX_t ThreadBssOffset = 0; + void switchTo(OutputSectionBase *Sec); + void flush(); + void output(InputSection<ELFT> *Sec); + void process(BaseCommand &Base); + llvm::DenseSet<OutputSectionBase *> AlreadyOutputOS; + llvm::DenseSet<InputSectionData *> AlreadyOutputIS; }; // Variable template is a C++14 feature, so we can't template @@ -97,7 +290,9 @@ private: template <class ELFT> struct Script { static LinkerScript<ELFT> *X; }; template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X; -} // namespace elf -} // namespace lld +extern LinkerScriptBase *ScriptBase; + +} // end namespace elf +} // end namespace lld -#endif +#endif // LLD_ELF_LINKER_SCRIPT_H diff --git a/contrib/llvm/tools/lld/ELF/MarkLive.cpp b/contrib/llvm/tools/lld/ELF/MarkLive.cpp index 41e30ce599d2..8d129fc3ff13 100644 --- a/contrib/llvm/tools/lld/ELF/MarkLive.cpp +++ b/contrib/llvm/tools/lld/ELF/MarkLive.cpp @@ -36,22 +36,24 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +namespace { // A resolved relocation. The Sec and Offset fields are set if the relocation // was resolved to an offset within a section. -template <class ELFT> -struct ResolvedReloc { +template <class ELFT> struct ResolvedReloc { InputSectionBase<ELFT> *Sec; typename ELFT::uint Offset; }; +} // end anonymous namespace template <class ELFT> static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, const typename ELFT::Rel &Rel) { - return Target->getImplicitAddend(Sec.getSectionData().begin(), + return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, Rel.getType(Config->Mips64EL)); } @@ -74,56 +76,103 @@ static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec, return {D->Section->Repl, Offset}; } -template <class ELFT, class Elf_Shdr> -static void run(ELFFile<ELFT> &Obj, InputSectionBase<ELFT> &Sec, - Elf_Shdr *RelSec, std::function<void(ResolvedReloc<ELFT>)> Fn) { - if (RelSec->sh_type == SHT_RELA) { - for (const typename ELFT::Rela &RI : Obj.relas(RelSec)) - Fn(resolveReloc(Sec, RI)); - } else { - for (const typename ELFT::Rel &RI : Obj.rels(RelSec)) - Fn(resolveReloc(Sec, RI)); - } -} - // Calls Fn for each section that Sec refers to via relocations. template <class ELFT> static void forEachSuccessor(InputSection<ELFT> &Sec, std::function<void(ResolvedReloc<ELFT>)> Fn) { - ELFFile<ELFT> &Obj = Sec.getFile()->getObj(); - for (const typename ELFT::Shdr *RelSec : Sec.RelocSections) - run(Obj, Sec, RelSec, Fn); + if (Sec.AreRelocsRela) { + for (const typename ELFT::Rela &Rel : Sec.relas()) + Fn(resolveReloc(Sec, Rel)); + } else { + for (const typename ELFT::Rel &Rel : Sec.rels()) + Fn(resolveReloc(Sec, Rel)); + } + if (Sec.DependentSection) + Fn({Sec.DependentSection, 0}); +} + +// The .eh_frame section is an unfortunate special case. +// The section is divided in CIEs and FDEs and the relocations it can have are +// * CIEs can refer to a personality function. +// * FDEs can refer to a LSDA +// * FDEs refer to the function they contain information about +// The last kind of relocation cannot keep the referred section alive, or they +// would keep everything alive in a common object file. In fact, each FDE is +// alive if the section it refers to is alive. +// To keep things simple, in here we just ignore the last relocation kind. The +// other two keep the referred section alive. +// +// A possible improvement would be to fully process .eh_frame in the middle of +// the gc pass. With that we would be able to also gc some sections holding +// LSDAs and personality functions if we found that they were unused. +template <class ELFT, class RelTy> +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + const endianness E = ELFT::TargetEndianness; + for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) { + EhSectionPiece &Piece = EH.Pieces[I]; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + continue; + if (read32<E>(Piece.data().data() + 4) == 0) { + // This is a CIE, we only need to worry about the first relocation. It is + // known to point to the personality function. + Enqueue(resolveReloc(EH, Rels[FirstRelI])); + continue; + } + // This is a FDE. The relocations point to the described function or to + // a LSDA. We only need to keep the LSDA alive, so ignore anything that + // points to executable sections. + typename ELFT::uint PieceEnd = Piece.InputOff + Piece.size(); + for (unsigned I2 = FirstRelI, N2 = Rels.size(); I2 < N2; ++I2) { + const RelTy &Rel = Rels[I2]; + if (Rel.r_offset >= PieceEnd) + break; + ResolvedReloc<ELFT> R = resolveReloc(EH, Rels[I2]); + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + continue; + if (R.Sec->Flags & SHF_EXECINSTR) + continue; + Enqueue({R.Sec, 0}); + } + } } template <class ELFT> -static void scanEhFrameSection(EhInputSection<ELFT> &EH, - std::function<void(ResolvedReloc<ELFT>)> Fn) { - if (!EH.RelocSection) +static void +scanEhFrameSection(EhInputSection<ELFT> &EH, + std::function<void(ResolvedReloc<ELFT>)> Enqueue) { + if (!EH.NumRelocations) return; - ELFFile<ELFT> &EObj = EH.getFile()->getObj(); - run<ELFT>(EObj, EH, EH.RelocSection, [&](ResolvedReloc<ELFT> R) { - if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) - return; - if (R.Sec->getSectionHdr()->sh_flags & SHF_EXECINSTR) - return; - Fn({R.Sec, 0}); - }); + + // Unfortunately we need to split .eh_frame early since some relocations in + // .eh_frame keep other section alive and some don't. + EH.split(); + + if (EH.AreRelocsRela) + scanEhFrameSection(EH, EH.relas(), Enqueue); + else + scanEhFrameSection(EH, EH.rels(), Enqueue); } -// Sections listed below are special because they are used by the loader -// just by being in an ELF file. They should not be garbage-collected. +// We do not garbage-collect two types of sections: +// 1) Sections used by the loader (.init, .fini, .ctors, .dtors or .jcr) +// 2) Non-allocatable sections which typically contain debugging information template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { - switch (Sec->getSectionHdr()->sh_type) { + switch (Sec->Type) { case SHT_FINI_ARRAY: case SHT_INIT_ARRAY: case SHT_NOTE: case SHT_PREINIT_ARRAY: return true; default: - StringRef S = Sec->getSectionName(); + if (!(Sec->Flags & SHF_ALLOC)) + return true; // We do not want to reclaim sections if they can be referred // by __start_* and __stop_* symbols. + StringRef S = Sec->Name; if (isValidCIdentifier(S)) return true; @@ -140,7 +189,15 @@ template <class ELFT> void elf::markLive() { SmallVector<InputSection<ELFT> *, 256> Q; auto Enqueue = [&](ResolvedReloc<ELFT> R) { - if (!R.Sec) + // Skip over discarded sections. This in theory shouldn't happen, because + // the ELF spec doesn't allow a relocation to point to a deduplicated + // COMDAT section directly. Unfortunately this happens in practice (e.g. + // .eh_frame) so we need to add a check. + if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + return; + + // We don't gc non alloc sections. + if (!(R.Sec->Flags & SHF_ALLOC)) return; // Usually, a whole section is marked as live or dead, but in mergeable @@ -152,6 +209,7 @@ template <class ELFT> void elf::markLive() { if (R.Sec->Live) return; R.Sec->Live = true; + // Add input section to the queue. if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec)) Q.push_back(S); }; @@ -162,8 +220,7 @@ template <class ELFT> void elf::markLive() { }; // Add GC root symbols. - if (Config->EntrySym) - MarkSymbol(Config->EntrySym->body()); + MarkSymbol(Symtab<ELFT>::X->find(Config->Entry)); MarkSymbol(Symtab<ELFT>::X->find(Config->Init)); MarkSymbol(Symtab<ELFT>::X->find(Config->Fini)); for (StringRef S : Config->Undefined) @@ -177,18 +234,15 @@ template <class ELFT> void elf::markLive() { // Preserve special sections and those which are specified in linker // script KEEP command. - for (const std::unique_ptr<ObjectFile<ELFT>> &F : - Symtab<ELFT>::X->getObjectFiles()) - for (InputSectionBase<ELFT> *Sec : F->getSections()) - if (Sec && Sec != &InputSection<ELFT>::Discarded) { - // .eh_frame is always marked as live now, but also it can reference to - // sections that contain personality. We preserve all non-text sections - // referred by .eh_frame here. - if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) - scanEhFrameSection<ELFT>(*EH, Enqueue); - if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) - Enqueue({Sec, 0}); - } + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + // .eh_frame is always marked as live now, but also it can reference to + // sections that contain personality. We preserve all non-text sections + // referred by .eh_frame here. + if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) + scanEhFrameSection<ELFT>(*EH, Enqueue); + if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) + Enqueue({Sec, 0}); + } // Mark all reachable sections. while (!Q.empty()) diff --git a/contrib/llvm/tools/lld/ELF/Memory.h b/contrib/llvm/tools/lld/ELF/Memory.h new file mode 100644 index 000000000000..e5a04ed1e5a8 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Memory.h @@ -0,0 +1,67 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines arena allocators. +// +// Almost all large objects, such as files, sections or symbols, are +// used for the entire lifetime of the linker once they are created. +// This usage characteristic makes arena allocator an attractive choice +// where the entire linker is one arena. With an arena, newly created +// objects belong to the arena and freed all at once when everything is done. +// Arena allocators are efficient and easy to understand. +// Most objects are allocated using the arena allocators defined by this file. +// +// If you edit this file, please edit COFF/Memory.h too. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_MEMORY_H +#define LLD_ELF_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { +namespace elf { + +// Use this arena if your object doesn't have a destructor. +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +// These two classes are hack to keep track of all +// SpecificBumpPtrAllocator instances. +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +// Use this arena if your object has a destructor. +// Your destructor will be invoked from freeArena(). +template <typename T, typename... U> T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Mips.cpp b/contrib/llvm/tools/lld/ELF/Mips.cpp new file mode 100644 index 000000000000..ac65672b70fc --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Mips.cpp @@ -0,0 +1,369 @@ +//===- Mips.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains a helper function for the Writer. +// +//===---------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Writer.h" + +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/MipsABIFlags.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elf; + +namespace { +struct ArchTreeEdge { + uint32_t Child; + uint32_t Parent; +}; + +struct FileFlags { + StringRef Filename; + uint32_t Flags; +}; +} + +static StringRef getAbiName(uint32_t Flags) { + switch (Flags) { + case 0: + return "n64"; + case EF_MIPS_ABI2: + return "n32"; + case EF_MIPS_ABI_O32: + return "o32"; + case EF_MIPS_ABI_O64: + return "o64"; + case EF_MIPS_ABI_EABI32: + return "eabi32"; + case EF_MIPS_ABI_EABI64: + return "eabi64"; + default: + return "unknown"; + } +} + +static StringRef getNanName(bool IsNan2008) { + return IsNan2008 ? "2008" : "legacy"; +} + +static StringRef getFpName(bool IsFp64) { return IsFp64 ? "64" : "32"; } + +static void checkFlags(ArrayRef<FileFlags> Files) { + uint32_t ABI = Files[0].Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + bool Nan = Files[0].Flags & EF_MIPS_NAN2008; + bool Fp = Files[0].Flags & EF_MIPS_FP64; + + for (const FileFlags &F : Files.slice(1)) { + uint32_t ABI2 = F.Flags & (EF_MIPS_ABI | EF_MIPS_ABI2); + if (ABI != ABI2) + error("target ABI '" + getAbiName(ABI) + "' is incompatible with '" + + getAbiName(ABI2) + "': " + F.Filename); + + bool Nan2 = F.Flags & EF_MIPS_NAN2008; + if (Nan != Nan2) + error("target -mnan=" + getNanName(Nan) + " is incompatible with -mnan=" + + getNanName(Nan2) + ": " + F.Filename); + + bool Fp2 = F.Flags & EF_MIPS_FP64; + if (Fp != Fp2) + error("target -mfp" + getFpName(Fp) + " is incompatible with -mfp" + + getFpName(Fp2) + ": " + F.Filename); + } +} + +static uint32_t getMiscFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = 0; + for (const FileFlags &F : Files) + Ret |= F.Flags & + (EF_MIPS_ABI | EF_MIPS_ABI2 | EF_MIPS_ARCH_ASE | EF_MIPS_NOREORDER | + EF_MIPS_MICROMIPS | EF_MIPS_NAN2008 | EF_MIPS_32BITMODE); + return Ret; +} + +static uint32_t getPicFlags(ArrayRef<FileFlags> Files) { + // Check PIC/non-PIC compatibility. + bool IsPic = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) { + bool IsPic2 = F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + if (IsPic && !IsPic2) + warn("linking abicalls code with non-abicalls file: " + F.Filename); + if (!IsPic && IsPic2) + warn("linking non-abicalls code with abicalls file: " + F.Filename); + } + + // Compute the result PIC/non-PIC flag. + uint32_t Ret = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + for (const FileFlags &F : Files.slice(1)) + Ret &= F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + + // PIC code is inherently CPIC and may not set CPIC flag explicitly. + if (Ret & EF_MIPS_PIC) + Ret |= EF_MIPS_CPIC; + return Ret; +} + +static ArchTreeEdge ArchTree[] = { + // MIPS32R6 and MIPS64R6 are not compatible with other extensions + // MIPS64R2 extensions. + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON3, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON2, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_OCTEON, EF_MIPS_ARCH_64R2}, + {EF_MIPS_ARCH_64R2 | EF_MIPS_MACH_LS3A, EF_MIPS_ARCH_64R2}, + // MIPS64 extensions. + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_SB1, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64 | EF_MIPS_MACH_XLR, EF_MIPS_ARCH_64}, + {EF_MIPS_ARCH_64R2, EF_MIPS_ARCH_64}, + // MIPS V extensions. + {EF_MIPS_ARCH_64, EF_MIPS_ARCH_5}, + // R5000 extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5500, EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400}, + // MIPS IV extensions. + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_5400, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_4 | EF_MIPS_MACH_9000, EF_MIPS_ARCH_4}, + {EF_MIPS_ARCH_5, EF_MIPS_ARCH_4}, + // VR4100 extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4111, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4120, EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100}, + // MIPS III extensions. + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4010, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4100, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_4650, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_5900, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2E, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_3 | EF_MIPS_MACH_LS2F, EF_MIPS_ARCH_3}, + {EF_MIPS_ARCH_4, EF_MIPS_ARCH_3}, + // MIPS32 extensions. + {EF_MIPS_ARCH_32R2, EF_MIPS_ARCH_32}, + // MIPS II extensions. + {EF_MIPS_ARCH_3, EF_MIPS_ARCH_2}, + {EF_MIPS_ARCH_32, EF_MIPS_ARCH_2}, + // MIPS I extensions. + {EF_MIPS_ARCH_1 | EF_MIPS_MACH_3900, EF_MIPS_ARCH_1}, + {EF_MIPS_ARCH_2, EF_MIPS_ARCH_1}, +}; + +static bool isArchMatched(uint32_t New, uint32_t Res) { + if (New == Res) + return true; + if (New == EF_MIPS_ARCH_32 && isArchMatched(EF_MIPS_ARCH_64, Res)) + return true; + if (New == EF_MIPS_ARCH_32R2 && isArchMatched(EF_MIPS_ARCH_64R2, Res)) + return true; + for (const auto &Edge : ArchTree) { + if (Res == Edge.Child) { + Res = Edge.Parent; + if (Res == New) + return true; + } + } + return false; +} + +static StringRef getMachName(uint32_t Flags) { + switch (Flags & EF_MIPS_MACH) { + case EF_MIPS_MACH_NONE: + return ""; + case EF_MIPS_MACH_3900: + return "r3900"; + case EF_MIPS_MACH_4010: + return "r4010"; + case EF_MIPS_MACH_4100: + return "r4100"; + case EF_MIPS_MACH_4650: + return "r4650"; + case EF_MIPS_MACH_4120: + return "r4120"; + case EF_MIPS_MACH_4111: + return "r4111"; + case EF_MIPS_MACH_5400: + return "vr5400"; + case EF_MIPS_MACH_5900: + return "vr5900"; + case EF_MIPS_MACH_5500: + return "vr5500"; + case EF_MIPS_MACH_9000: + return "rm9000"; + case EF_MIPS_MACH_LS2E: + return "loongson2e"; + case EF_MIPS_MACH_LS2F: + return "loongson2f"; + case EF_MIPS_MACH_LS3A: + return "loongson3a"; + case EF_MIPS_MACH_OCTEON: + return "octeon"; + case EF_MIPS_MACH_OCTEON2: + return "octeon2"; + case EF_MIPS_MACH_OCTEON3: + return "octeon3"; + case EF_MIPS_MACH_SB1: + return "sb1"; + case EF_MIPS_MACH_XLR: + return "xlr"; + default: + return "unknown machine"; + } +} + +static StringRef getArchName(uint32_t Flags) { + StringRef S = getMachName(Flags); + if (!S.empty()) + return S; + + switch (Flags & EF_MIPS_ARCH) { + case EF_MIPS_ARCH_1: + return "mips1"; + case EF_MIPS_ARCH_2: + return "mips2"; + case EF_MIPS_ARCH_3: + return "mips3"; + case EF_MIPS_ARCH_4: + return "mips4"; + case EF_MIPS_ARCH_5: + return "mips5"; + case EF_MIPS_ARCH_32: + return "mips32"; + case EF_MIPS_ARCH_64: + return "mips64"; + case EF_MIPS_ARCH_32R2: + return "mips32r2"; + case EF_MIPS_ARCH_64R2: + return "mips64r2"; + case EF_MIPS_ARCH_32R6: + return "mips32r6"; + case EF_MIPS_ARCH_64R6: + return "mips64r6"; + default: + return "unknown arch"; + } +} + +// There are (arguably too) many MIPS ISAs out there. Their relationships +// can be represented as a forest. If all input files have ISAs which +// reachable by repeated proceeding from the single child to the parent, +// these input files are compatible. In that case we need to return "highest" +// ISA. If there are incompatible input files, we show an error. +// For example, mips1 is a "parent" of mips2 and such files are compatible. +// Output file gets EF_MIPS_ARCH_2 flag. From the other side mips3 and mips32 +// are incompatible because nor mips3 is a parent for misp32, nor mips32 +// is a parent for mips3. +static uint32_t getArchFlags(ArrayRef<FileFlags> Files) { + uint32_t Ret = Files[0].Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + for (const FileFlags &F : Files.slice(1)) { + uint32_t New = F.Flags & (EF_MIPS_ARCH | EF_MIPS_MACH); + + // Check ISA compatibility. + if (isArchMatched(New, Ret)) + continue; + if (!isArchMatched(Ret, New)) { + error("target ISA '" + getArchName(Ret) + "' is incompatible with '" + + getArchName(New) + "': " + F.Filename); + return 0; + } + Ret = New; + } + return Ret; +} + +template <class ELFT> uint32_t elf::getMipsEFlags() { + std::vector<FileFlags> V; + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) + V.push_back({F->getName(), F->getObj().getHeader()->e_flags}); + if (V.empty()) + return 0; + checkFlags(V); + return getMiscFlags(V) | getPicFlags(V) | getArchFlags(V); +} + +static int compareMipsFpAbi(uint8_t FpA, uint8_t FpB) { + if (FpA == FpB) + return 0; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_ANY) + return 1; + if (FpB == Mips::Val_GNU_MIPS_ABI_FP_64A && + FpA == Mips::Val_GNU_MIPS_ABI_FP_64) + return 1; + if (FpB != Mips::Val_GNU_MIPS_ABI_FP_XX) + return -1; + if (FpA == Mips::Val_GNU_MIPS_ABI_FP_DOUBLE || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64 || + FpA == Mips::Val_GNU_MIPS_ABI_FP_64A) + return 1; + return -1; +} + +static StringRef getMipsFpAbiName(uint8_t FpAbi) { + switch (FpAbi) { + case Mips::Val_GNU_MIPS_ABI_FP_ANY: + return "any"; + case Mips::Val_GNU_MIPS_ABI_FP_DOUBLE: + return "-mdouble-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SINGLE: + return "-msingle-float"; + case Mips::Val_GNU_MIPS_ABI_FP_SOFT: + return "-msoft-float"; + case Mips::Val_GNU_MIPS_ABI_FP_OLD_64: + return "-mips32r2 -mfp64 (old)"; + case Mips::Val_GNU_MIPS_ABI_FP_XX: + return "-mfpxx"; + case Mips::Val_GNU_MIPS_ABI_FP_64: + return "-mgp32 -mfp64"; + case Mips::Val_GNU_MIPS_ABI_FP_64A: + return "-mgp32 -mfp64 -mno-odd-spreg"; + default: + return "unknown"; + } +} + +uint8_t elf::getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + StringRef FileName) { + if (compareMipsFpAbi(NewFlag, OldFlag) >= 0) + return NewFlag; + if (compareMipsFpAbi(OldFlag, NewFlag) < 0) + error("target floating point ABI '" + getMipsFpAbiName(OldFlag) + + "' is incompatible with '" + getMipsFpAbiName(NewFlag) + "': " + + FileName); + return OldFlag; +} + +template <class ELFT> static bool isN32Abi(const InputFile *F) { + if (auto *EF = dyn_cast<ELFFileBase<ELFT>>(F)) + return EF->getObj().getHeader()->e_flags & EF_MIPS_ABI2; + return false; +} + +bool elf::isMipsN32Abi(const InputFile *F) { + switch (Config->EKind) { + case ELF32LEKind: + return isN32Abi<ELF32LE>(F); + case ELF32BEKind: + return isN32Abi<ELF32BE>(F); + case ELF64LEKind: + return isN32Abi<ELF64LE>(F); + case ELF64BEKind: + return isN32Abi<ELF64BE>(F); + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +template uint32_t elf::getMipsEFlags<ELF32LE>(); +template uint32_t elf::getMipsEFlags<ELF32BE>(); +template uint32_t elf::getMipsEFlags<ELF64LE>(); +template uint32_t elf::getMipsEFlags<ELF64BE>(); diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td index 010f37687f03..d436f056d013 100644 --- a/contrib/llvm/tools/lld/ELF/Options.td +++ b/contrib/llvm/tools/lld/ELF/Options.td @@ -7,6 +7,8 @@ class J<string name>: Joined<["--", "-"], name>; class S<string name>: Separate<["--", "-"], name>; class JS<string name>: JoinedOrSeparate<["--", "-"], name>; +def auxiliary: S<"auxiliary">, HelpText<"Set DT_AUXILIARY field to the specified name">; + def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">; def Bsymbolic_functions: F<"Bsymbolic-functions">, @@ -25,12 +27,24 @@ def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, def O: Joined<["-"], "O">, HelpText<"Optimize output file size">; +def Tbss: S<"Tbss">, HelpText<"Same as --section-start with .bss as the sectionname">; + +def Tdata: S<"Tdata">, HelpText<"Same as --section-start with .data as the sectionname">; + +def Ttext: S<"Ttext">, HelpText<"Same as --section-start with .text as the sectionname">; + def allow_multiple_definition: F<"allow-multiple-definition">, HelpText<"Allow multiple definitions">; def as_needed: F<"as-needed">, HelpText<"Only set DT_NEEDED for shared libraries if used">; +def color_diagnostics: F<"color-diagnostics">, + HelpText<"Use colors in diagnostics">; + +def color_diagnostics_eq: J<"color-diagnostics=">, + HelpText<"Use colors in diagnostics">; + def disable_new_dtags: F<"disable-new-dtags">, HelpText<"Disable new dynamic tags">; @@ -60,6 +74,9 @@ def end_lib: F<"end-lib">, def entry: S<"entry">, MetaVarName<"<entry>">, HelpText<"Name of entry point symbol">; +def error_limit: S<"error-limit">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + def export_dynamic: F<"export-dynamic">, HelpText<"Put symbols in the dynamic symbol table">; @@ -72,6 +89,18 @@ def fatal_warnings: F<"fatal-warnings">, def fini: S<"fini">, MetaVarName<"<symbol>">, HelpText<"Specify a finalizer function">; +def full_shutdown : F<"full-shutdown">, + HelpText<"Perform a full shutdown instead of calling _exit">; + +def format: J<"format=">, MetaVarName<"<input-format>">, + HelpText<"Change the input format of the inputs following this option">; + +def gc_sections: F<"gc-sections">, + HelpText<"Enable garbage collection of unused sections">; + +def gdb_index: F<"gdb-index">, + HelpText<"Generate .gdb_index section">; + def hash_style: S<"hash-style">, HelpText<"Specify hash style (sysv, gnu or both)">; @@ -81,9 +110,6 @@ def icf: F<"icf=all">, HelpText<"Enable identical code folding">; def image_base : J<"image-base=">, HelpText<"Set the base address">; -def gc_sections: F<"gc-sections">, - HelpText<"Enable garbage collection of unused sections">; - def init: S<"init">, MetaVarName<"<symbol>">, HelpText<"Specify an initializer function">; @@ -95,21 +121,37 @@ def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; +def nostdlib: F<"nostdlib">, + HelpText<"Only search directories specified on the command line">; + def no_as_needed: F<"no-as-needed">, HelpText<"Always DT_NEEDED for shared libraries">; +def no_color_diagnostics: F<"no-color-diagnostics">, + HelpText<"Do not use colors in diagnostics">; + def no_demangle: F<"no-demangle">, HelpText<"Do not demangle symbol names">; +def no_gc_sections: F<"no-gc-sections">, + HelpText<"Disable garbage collection of unused sections">; + def no_gnu_unique: F<"no-gnu-unique">, HelpText<"Disable STB_GNU_UNIQUE symbol binding">; +def no_threads: F<"no-threads">, + HelpText<"Do not run the linker multi-threaded">; + def no_whole_archive: F<"no-whole-archive">, HelpText<"Restores the default behavior of loading archive members">; def noinhibit_exec: F<"noinhibit-exec">, HelpText<"Retain the executable output file whenever it is still usable">; +def nopie: F<"nopie">, HelpText<"Do not create a position independent executable">; + +def no_rosegment: F<"no-rosegment">, HelpText<"Do not put read-only non-executable sections in their own segment">; + def no_undefined: F<"no-undefined">, HelpText<"Report unresolved symbols even if the linker is creating a shared library">; @@ -119,6 +161,12 @@ def no_undefined_version: F<"no-undefined-version">, def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, HelpText<"Path to file to write output">; +def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, + HelpText<"Specify the binary format for the output object file">; + +def omagic: F<"omagic">, MetaVarName<"<magic>">, + HelpText<"Set the text and data sections to be readable and writable">; + def pie: F<"pie">, HelpText<"Create a position independent executable">; def print_gc_sections: F<"print-gc-sections">, @@ -131,12 +179,20 @@ def rpath: S<"rpath">, HelpText<"Add a DT_RUNPATH to the output">; def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; +def retain_symbols_file: J<"retain-symbols-file=">, MetaVarName<"<file>">, + HelpText<"Retain only the symbols listed in the file">; + def script: S<"script">, HelpText<"Read linker script">; +def section_start: S<"section-start">, MetaVarName<"<address>">, + HelpText<"Set address of section">; + def shared: F<"shared">, HelpText<"Build a shared object">; def soname: J<"soname=">, HelpText<"Set DT_SONAME">; +def sort_section: S<"sort-section">, HelpText<"Specifies sections sorting rule when linkerscript is used">; + def start_lib: F<"start-lib">, HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">; @@ -144,15 +200,24 @@ def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +def symbol_ordering_file: S<"symbol-ordering-file">, + HelpText<"Layout sections in the order specified by symbol file">; + def sysroot: J<"sysroot=">, HelpText<"Set the system root">; -def threads: F<"threads">, HelpText<"Enable use of threads">; +def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">; + +def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32">; + +def target2: J<"target2=">, MetaVarName<"<type>">, HelpText<"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">; + +def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; def trace: F<"trace">, HelpText<"Print the names of the input files">; def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">; -def undefined: J<"undefined=">, +def undefined: S<"undefined">, HelpText<"Force undefined symbol during linking">; def unresolved_symbols: J<"unresolved-symbols=">, @@ -161,9 +226,11 @@ def unresolved_symbols: J<"unresolved-symbols=">, def rsp_quoting: J<"rsp-quoting=">, HelpText<"Quoting style for response files. Values supported: windows|posix">; +def v: Flag<["-"], "v">, HelpText<"Display the version number">; + def verbose: F<"verbose">, HelpText<"Verbose mode">; -def version: F<"version">, HelpText<"Display the version number">; +def version: F<"version">, HelpText<"Display the version number and exit">; def version_script: S<"version-script">, HelpText<"Read a version script">; @@ -181,6 +248,7 @@ def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, HelpText<"Linker option extensions">; // Aliases +def alias_auxiliary: Separate<["-"], "f">, Alias<auxiliary>; def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>; def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>; def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>; @@ -192,29 +260,41 @@ def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>; def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>; def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; def alias_entry_entry: J<"entry=">, Alias<entry>; +def alias_error_limit: J<"error-limit=">, Alias<error_limit>; def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>; def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">, Alias<export_dynamic_symbol>; def alias_fini_fini: J<"fini=">, Alias<fini>; +def alias_format_b: S<"b">, Alias<format>; def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>; def alias_init_init: J<"init=">, Alias<init>; def alias_l__library: J<"library=">, Alias<l>; +def alias_omagic: Flag<["-"], "N">, Alias<omagic>; def alias_o_output: Joined<["--"], "output=">, Alias<o>; +def alias_o_output2 : Separate<["--"], "output">, Alias<o>; def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>; def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; -def alias_rpath_R: Joined<["-"], "R">, Alias<rpath>; +def alias_retain_symbols_file: S<"retain-symbols-file">, Alias<retain_symbols_file>; +def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias<rpath>; def alias_rpath_rpath: J<"rpath=">, Alias<rpath>; def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>; def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>; def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>; def alias_soname_soname: S<"soname">, Alias<soname>; +def alias_sort_section: J<"sort-section=">, Alias<sort_section>; +def alias_script: J<"script=">, Alias<script>; def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>; def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>; +def alias_Tbss: J<"Tbss=">, Alias<Tbss>; +def alias_Tdata: J<"Tdata=">, Alias<Tdata>; def alias_trace: Flag<["-"], "t">, Alias<trace>; def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>; +def alias_Ttext: J<"Ttext=">, Alias<Ttext>; +def alias_Ttext_segment: S<"Ttext-segment">, Alias<Ttext>; +def alias_Ttext_segment_eq: J<"Ttext-segment=">, Alias<Ttext>; +def alias_undefined_eq: J<"undefined=">, Alias<undefined>; def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>; def alias_version_V: Flag<["-"], "V">, Alias<version>; -def alias_version_v: Flag<["-"], "v">, Alias<version>; def alias_wrap_wrap: J<"wrap=">, Alias<wrap>; // Our symbol resolution algorithm handles symbols in archive files differently @@ -239,9 +319,13 @@ def plugin_opt_eq: J<"plugin-opt=">; // Options listed below are silently ignored for now for compatibility. def allow_shlib_undefined: F<"allow-shlib-undefined">; +def cref: Flag<["--"], "cref">; def define_common: F<"define-common">; def demangle: F<"demangle">; def detect_odr_violations: F<"detect-odr-violations">; +def g: Flag<["-"], "g">; +def M: Flag<["-"], "M">; +def Map: JS<"Map">; def no_add_needed: F<"no-add-needed">; def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">; def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">, @@ -254,23 +338,30 @@ def no_warn_mismatch: F<"no-warn-mismatch">; def rpath_link: S<"rpath-link">; def rpath_link_eq: J<"rpath-link=">; def sort_common: F<"sort-common">; +def stats: F<"stats">; def warn_execstack: F<"warn-execstack">; def warn_shared_textrel: F<"warn-shared-textrel">; -def G: Separate<["-"], "G">; +def EB : F<"EB">; +def EL : F<"EL">; +def G: JoinedOrSeparate<["-"], "G">; +def Qy : F<"Qy">; // Aliases for ignored options def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>; def alias_define_common_dc: F<"dc">, Alias<define_common>; def alias_define_common_dp: F<"dp">, Alias<define_common>; +def alias_Map_eq: J<"Map=">, Alias<Map>; def alias_version_script_version_script: J<"version-script=">, Alias<version_script>; // LTO-related options. -def lto_jobs: J<"lto-jobs=">, HelpText<"Number of threads to run codegen">; def lto_aa_pipeline: J<"lto-aa-pipeline=">, HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; def lto_newpm_passes: J<"lto-newpm-passes=">, HelpText<"Passes to run during LTO">; +def lto_partitions: J<"lto-partitions=">, + HelpText<"Number of LTO codegen partitions">; def disable_verify: F<"disable-verify">; def mllvm: S<"mllvm">; def save_temps: F<"save-temps">; +def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp index 50b94015f229..bf7f9c29a29a 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.cpp +++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp @@ -11,15 +11,16 @@ #include "Config.h" #include "EhFrame.h" #include "LinkerScript.h" +#include "Memory.h" #include "Strings.h" #include "SymbolTable.h" +#include "SyntheticSections.h" #include "Target.h" -#include "lld/Core/Parallel.h" +#include "Threads.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SHA1.h" -#include <map> using namespace llvm; using namespace llvm::dwarf; @@ -30,855 +31,152 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -template <class ELFT> -OutputSectionBase<ELFT>::OutputSectionBase(StringRef Name, uint32_t Type, - uintX_t Flags) +OutputSectionBase::OutputSectionBase(StringRef Name, uint32_t Type, + uint64_t Flags) : Name(Name) { - memset(&Header, 0, sizeof(Elf_Shdr)); - Header.sh_type = Type; - Header.sh_flags = Flags; - Header.sh_addralign = 1; + this->Type = Type; + this->Flags = Flags; + this->Addralign = 1; } -template <class ELFT> -void OutputSectionBase<ELFT>::writeHeaderTo(Elf_Shdr *Shdr) { - *Shdr = Header; +uint32_t OutputSectionBase::getPhdrFlags() const { + uint32_t Ret = PF_R; + if (Flags & SHF_WRITE) + Ret |= PF_W; + if (Flags & SHF_EXECINSTR) + Ret |= PF_X; + return Ret; } template <class ELFT> -GotPltSection<ELFT>::GotPltSection() - : OutputSectionBase<ELFT>(".got.plt", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE) { - this->Header.sh_addralign = Target->GotPltEntrySize; -} - -template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody &Sym) { - Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); - Entries.push_back(&Sym); -} - -template <class ELFT> bool GotPltSection<ELFT>::empty() const { - return Entries.empty(); -} - -template <class ELFT> void GotPltSection<ELFT>::finalize() { - this->Header.sh_size = (Target->GotPltHeaderEntriesNum + Entries.size()) * - Target->GotPltEntrySize; -} - -template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { - Target->writeGotPltHeader(Buf); - Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; - for (const SymbolBody *B : Entries) { - Target->writeGotPlt(Buf, *B); - Buf += sizeof(uintX_t); +void OutputSectionBase::writeHeaderTo(typename ELFT::Shdr *Shdr) { + Shdr->sh_entsize = Entsize; + Shdr->sh_addralign = Addralign; + Shdr->sh_type = Type; + Shdr->sh_offset = Offset; + Shdr->sh_flags = Flags; + Shdr->sh_info = Info; + Shdr->sh_link = Link; + Shdr->sh_addr = Addr; + Shdr->sh_size = Size; + Shdr->sh_name = ShName; +} + +template <class ELFT> static uint64_t getEntsize(uint32_t Type) { + switch (Type) { + case SHT_RELA: + return sizeof(typename ELFT::Rela); + case SHT_REL: + return sizeof(typename ELFT::Rel); + case SHT_MIPS_REGINFO: + return sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_OPTIONS: + return sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>); + case SHT_MIPS_ABIFLAGS: + return sizeof(Elf_Mips_ABIFlags<ELFT>); + default: + return 0; } } template <class ELFT> -GotSection<ELFT>::GotSection() - : OutputSectionBase<ELFT>(".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE) { - if (Config->EMachine == EM_MIPS) - this->Header.sh_flags |= SHF_MIPS_GPREL; - this->Header.sh_addralign = Target->GotEntrySize; -} - -template <class ELFT> -void GotSection<ELFT>::addEntry(SymbolBody &Sym) { - Sym.GotIndex = Entries.size(); - Entries.push_back(&Sym); -} - -template <class ELFT> -void GotSection<ELFT>::addMipsEntry(SymbolBody &Sym, uintX_t Addend, - RelExpr Expr) { - // For "true" local symbols which can be referenced from the same module - // only compiler creates two instructions for address loading: - // - // lw $8, 0($gp) # R_MIPS_GOT16 - // addi $8, $8, 0 # R_MIPS_LO16 - // - // The first instruction loads high 16 bits of the symbol address while - // the second adds an offset. That allows to reduce number of required - // GOT entries because only one global offset table entry is necessary - // for every 64 KBytes of local data. So for local symbols we need to - // allocate number of GOT entries to hold all required "page" addresses. - // - // All global symbols (hidden and regular) considered by compiler uniformly. - // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation - // to load address of the symbol. So for each such symbol we need to - // allocate dedicated GOT entry to store its address. - // - // If a symbol is preemptible we need help of dynamic linker to get its - // final address. The corresponding GOT entries are allocated in the - // "global" part of GOT. Entries for non preemptible global symbol allocated - // in the "local" part of GOT. - // - // See "Global Offset Table" in Chapter 5: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - if (Expr == R_MIPS_GOT_LOCAL_PAGE) { - // At this point we do not know final symbol value so to reduce number - // of allocated GOT entries do the following trick. Save all output - // sections referenced by GOT relocations. Then later in the `finalize` - // method calculate number of "pages" required to cover all saved output - // section and allocate appropriate number of GOT entries. - auto *OutSec = cast<DefinedRegular<ELFT>>(&Sym)->Section->OutSec; - MipsOutSections.insert(OutSec); - return; - } - if (Sym.isTls()) { - // GOT entries created for MIPS TLS relocations behave like - // almost GOT entries from other ABIs. They go to the end - // of the global offset table. - Sym.GotIndex = Entries.size(); - Entries.push_back(&Sym); - return; - } - auto AddEntry = [&](SymbolBody &S, uintX_t A, MipsGotEntries &Items) { - if (S.isInGot() && !A) - return; - size_t NewIndex = Items.size(); - if (!MipsGotMap.insert({{&S, A}, NewIndex}).second) - return; - Items.emplace_back(&S, A); - if (!A) - S.GotIndex = NewIndex; - }; - if (Sym.isPreemptible()) { - // Ignore addends for preemptible symbols. They got single GOT entry anyway. - AddEntry(Sym, 0, MipsGlobal); - Sym.IsInGlobalMipsGot = true; - } else - AddEntry(Sym, Addend, MipsLocal); -} - -template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { - if (Sym.GlobalDynIndex != -1U) - return false; - Sym.GlobalDynIndex = Entries.size(); - // Global Dynamic TLS entries take two GOT slots. - Entries.push_back(nullptr); - Entries.push_back(&Sym); - return true; +OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t Type, uintX_t Flags) + : OutputSectionBase(Name, Type, Flags) { + this->Entsize = getEntsize<ELFT>(Type); } -// Reserves TLS entries for a TLS module ID and a TLS block offset. -// In total it takes two GOT slots. -template <class ELFT> bool GotSection<ELFT>::addTlsIndex() { - if (TlsIndexOff != uint32_t(-1)) +template <typename ELFT> +static bool compareByFilePosition(InputSection<ELFT> *A, + InputSection<ELFT> *B) { + // Synthetic doesn't have link order dependecy, stable_sort will keep it last + if (A->kind() == InputSectionData::Synthetic || + B->kind() == InputSectionData::Synthetic) return false; - TlsIndexOff = Entries.size() * sizeof(uintX_t); - Entries.push_back(nullptr); - Entries.push_back(nullptr); - return true; -} - -template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getMipsLocalPageOffset(uintX_t EntryValue) { - // Initialize the entry by the %hi(EntryValue) expression - // but without right-shifting. - EntryValue = (EntryValue + 0x8000) & ~0xffff; - // Take into account MIPS GOT header. - // See comment in the GotSection::writeTo. - size_t NewIndex = MipsLocalGotPos.size() + 2; - auto P = MipsLocalGotPos.insert(std::make_pair(EntryValue, NewIndex)); - assert(!P.second || MipsLocalGotPos.size() <= MipsPageEntries); - return (uintX_t)P.first->second * sizeof(uintX_t) - MipsGPOffset; -} - -template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getMipsGotOffset(const SymbolBody &B, uintX_t Addend) const { - uintX_t Off = MipsPageEntries; - if (B.isTls()) - Off += MipsLocal.size() + MipsGlobal.size() + B.GotIndex; - else if (B.IsInGlobalMipsGot) - Off += MipsLocal.size() + B.GotIndex; - else if (B.isInGot()) - Off += B.GotIndex; - else { - auto It = MipsGotMap.find({&B, Addend}); - assert(It != MipsGotMap.end()); - Off += It->second; - } - return Off * sizeof(uintX_t) - MipsGPOffset; -} - -template <class ELFT> -typename GotSection<ELFT>::uintX_t GotSection<ELFT>::getMipsTlsOffset() { - return (MipsPageEntries + MipsLocal.size() + MipsGlobal.size()) * - sizeof(uintX_t); -} - -template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { - return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); -} - -template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { - return B.GlobalDynIndex * sizeof(uintX_t); -} - -template <class ELFT> -const SymbolBody *GotSection<ELFT>::getMipsFirstGlobalEntry() const { - return MipsGlobal.empty() ? nullptr : MipsGlobal.front().first; -} - -template <class ELFT> -unsigned GotSection<ELFT>::getMipsLocalEntriesNum() const { - return MipsPageEntries + MipsLocal.size(); + auto *LA = cast<InputSection<ELFT>>(A->getLinkOrderDep()); + auto *LB = cast<InputSection<ELFT>>(B->getLinkOrderDep()); + OutputSectionBase *AOut = LA->OutSec; + OutputSectionBase *BOut = LB->OutSec; + if (AOut != BOut) + return AOut->SectionIndex < BOut->SectionIndex; + return LA->OutSecOff < LB->OutSecOff; } -template <class ELFT> void GotSection<ELFT>::finalize() { - size_t EntriesNum = Entries.size(); - if (Config->EMachine == EM_MIPS) { - // Take into account MIPS GOT header. - // See comment in the GotSection::writeTo. - MipsPageEntries += 2; - for (const OutputSectionBase<ELFT> *OutSec : MipsOutSections) { - // Calculate an upper bound of MIPS GOT entries required to store page - // addresses of local symbols. We assume the worst case - each 64kb - // page of the output section has at least one GOT relocation against it. - // Add 0x8000 to the section's size because the page address stored - // in the GOT entry is calculated as (value + 0x8000) & ~0xffff. - MipsPageEntries += (OutSec->getSize() + 0x8000 + 0xfffe) / 0xffff; - } - EntriesNum += MipsPageEntries + MipsLocal.size() + MipsGlobal.size(); - } - this->Header.sh_size = EntriesNum * sizeof(uintX_t); -} - -template <class ELFT> void GotSection<ELFT>::writeMipsGot(uint8_t *&Buf) { - // Set the MSB of the second GOT slot. This is not required by any - // MIPS ABI documentation, though. - // - // There is a comment in glibc saying that "The MSB of got[1] of a - // gnu object is set to identify gnu objects," and in GNU gold it - // says "the second entry will be used by some runtime loaders". - // But how this field is being used is unclear. - // - // We are not really willing to mimic other linkers behaviors - // without understanding why they do that, but because all files - // generated by GNU tools have this special GOT value, and because - // we've been doing this for years, it is probably a safe bet to - // keep doing this for now. We really need to revisit this to see - // if we had to do this. - auto *P = reinterpret_cast<typename ELFT::Off *>(Buf); - P[1] = uintX_t(1) << (ELFT::Is64Bits ? 63 : 31); - // Write 'page address' entries to the local part of the GOT. - for (std::pair<uintX_t, size_t> &L : MipsLocalGotPos) { - uint8_t *Entry = Buf + L.second * sizeof(uintX_t); - write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Entry, L.first); - } - Buf += MipsPageEntries * sizeof(uintX_t); - auto AddEntry = [&](const MipsGotEntry &SA) { - uint8_t *Entry = Buf; - Buf += sizeof(uintX_t); - const SymbolBody* Body = SA.first; - uintX_t VA = Body->template getVA<ELFT>(SA.second); - write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Entry, VA); - }; - std::for_each(std::begin(MipsLocal), std::end(MipsLocal), AddEntry); - std::for_each(std::begin(MipsGlobal), std::end(MipsGlobal), AddEntry); -} - -template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { - if (Config->EMachine == EM_MIPS) - writeMipsGot(Buf); - for (const SymbolBody *B : Entries) { - uint8_t *Entry = Buf; - Buf += sizeof(uintX_t); - if (!B) - continue; - if (B->isPreemptible()) - continue; // The dynamic linker will take care of it. - uintX_t VA = B->getVA<ELFT>(); - write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Entry, VA); - } -} - -template <class ELFT> -PltSection<ELFT>::PltSection() - : OutputSectionBase<ELFT>(".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR) { - this->Header.sh_addralign = 16; -} - -template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { - // At beginning of PLT, we have code to call the dynamic linker - // to resolve dynsyms at runtime. Write such code. - Target->writePltHeader(Buf); - size_t Off = Target->PltHeaderSize; - - for (auto &I : Entries) { - const SymbolBody *B = I.first; - unsigned RelOff = I.second; - uint64_t Got = B->getGotPltVA<ELFT>(); - uint64_t Plt = this->getVA() + Off; - Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); - Off += Target->PltEntrySize; - } -} - -template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody &Sym) { - Sym.PltIndex = Entries.size(); - unsigned RelOff = Out<ELFT>::RelaPlt->getRelocOffset(); - Entries.push_back(std::make_pair(&Sym, RelOff)); -} - -template <class ELFT> void PltSection<ELFT>::finalize() { - this->Header.sh_size = - Target->PltHeaderSize + Entries.size() * Target->PltEntrySize; -} - -template <class ELFT> -RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) - : OutputSectionBase<ELFT>(Name, Config->Rela ? SHT_RELA : SHT_REL, - SHF_ALLOC), - Sort(Sort) { - this->Header.sh_entsize = Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); - this->Header.sh_addralign = sizeof(uintX_t); -} - -template <class ELFT> -void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { - Relocs.push_back(Reloc); -} - -template <class ELFT, class RelTy> -static bool compRelocations(const RelTy &A, const RelTy &B) { - return A.getSymbol(Config->Mips64EL) < B.getSymbol(Config->Mips64EL); -} - -template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { - uint8_t *BufBegin = Buf; - for (const DynamicReloc<ELFT> &Rel : Relocs) { - auto *P = reinterpret_cast<Elf_Rela *>(Buf); - Buf += Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); - - if (Config->Rela) - P->r_addend = Rel.getAddend(); - P->r_offset = Rel.getOffset(); - if (Config->EMachine == EM_MIPS && Rel.getOutputSec() == Out<ELFT>::Got) - // Dynamic relocation against MIPS GOT section make deal TLS entries - // allocated in the end of the GOT. We need to adjust the offset to take - // in account 'local' and 'global' GOT entries. - P->r_offset += Out<ELFT>::Got->getMipsTlsOffset(); - P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->Mips64EL); - } - - if (Sort) { - if (Config->Rela) - std::stable_sort((Elf_Rela *)BufBegin, - (Elf_Rela *)BufBegin + Relocs.size(), - compRelocations<ELFT, Elf_Rela>); - else - std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), - compRelocations<ELFT, Elf_Rel>); - } -} - -template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { - return this->Header.sh_entsize * Relocs.size(); -} - -template <class ELFT> void RelocationSection<ELFT>::finalize() { - this->Header.sh_link = Static ? Out<ELFT>::SymTab->SectionIndex - : Out<ELFT>::DynSymTab->SectionIndex; - this->Header.sh_size = Relocs.size() * this->Header.sh_entsize; -} - -template <class ELFT> -InterpSection<ELFT>::InterpSection() - : OutputSectionBase<ELFT>(".interp", SHT_PROGBITS, SHF_ALLOC) { - this->Header.sh_size = Config->DynamicLinker.size() + 1; -} - -template <class ELFT> void InterpSection<ELFT>::writeTo(uint8_t *Buf) { - StringRef S = Config->DynamicLinker; - memcpy(Buf, S.data(), S.size()); -} - -template <class ELFT> -HashTableSection<ELFT>::HashTableSection() - : OutputSectionBase<ELFT>(".hash", SHT_HASH, SHF_ALLOC) { - this->Header.sh_entsize = sizeof(Elf_Word); - this->Header.sh_addralign = sizeof(Elf_Word); -} - -static uint32_t hashSysv(StringRef Name) { - uint32_t H = 0; - for (char C : Name) { - H = (H << 4) + C; - uint32_t G = H & 0xf0000000; - if (G) - H ^= G >> 24; - H &= ~G; - } - return H; -} - -template <class ELFT> void HashTableSection<ELFT>::finalize() { - this->Header.sh_link = Out<ELFT>::DynSymTab->SectionIndex; - - unsigned NumEntries = 2; // nbucket and nchain. - NumEntries += Out<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. - - // Create as many buckets as there are symbols. - // FIXME: This is simplistic. We can try to optimize it, but implementing - // support for SHT_GNU_HASH is probably even more profitable. - NumEntries += Out<ELFT>::DynSymTab->getNumSymbols(); - this->Header.sh_size = NumEntries * sizeof(Elf_Word); -} - -template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { - unsigned NumSymbols = Out<ELFT>::DynSymTab->getNumSymbols(); - auto *P = reinterpret_cast<Elf_Word *>(Buf); - *P++ = NumSymbols; // nbucket - *P++ = NumSymbols; // nchain - - Elf_Word *Buckets = P; - Elf_Word *Chains = P + NumSymbols; - - for (const std::pair<SymbolBody *, unsigned> &P : - Out<ELFT>::DynSymTab->getSymbols()) { - SymbolBody *Body = P.first; - StringRef Name = Body->getName(); - unsigned I = Body->DynsymIndex; - uint32_t Hash = hashSysv(Name) % NumSymbols; - Chains[I] = Buckets[Hash]; - Buckets[Hash] = I; - } -} - -static uint32_t hashGnu(StringRef Name) { - uint32_t H = 5381; - for (uint8_t C : Name) - H = (H << 5) + H + C; - return H; -} - -template <class ELFT> -GnuHashTableSection<ELFT>::GnuHashTableSection() - : OutputSectionBase<ELFT>(".gnu.hash", SHT_GNU_HASH, SHF_ALLOC) { - this->Header.sh_entsize = ELFT::Is64Bits ? 0 : 4; - this->Header.sh_addralign = sizeof(uintX_t); -} - -template <class ELFT> -unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { - if (!NumHashed) - return 0; - - // These values are prime numbers which are not greater than 2^(N-1) + 1. - // In result, for any particular NumHashed we return a prime number - // which is not greater than NumHashed. - static const unsigned Primes[] = { - 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, - 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; - - return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), - array_lengthof(Primes) - 1)]; -} - -// Bloom filter estimation: at least 8 bits for each hashed symbol. -// GNU Hash table requirement: it should be a power of 2, -// the minimum value is 1, even for an empty table. -// Expected results for a 32-bit target: -// calcMaskWords(0..4) = 1 -// calcMaskWords(5..8) = 2 -// calcMaskWords(9..16) = 4 -// For a 64-bit target: -// calcMaskWords(0..8) = 1 -// calcMaskWords(9..16) = 2 -// calcMaskWords(17..32) = 4 -template <class ELFT> -unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { - if (!NumHashed) - return 1; - return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); -} - -template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { - unsigned NumHashed = Symbols.size(); - NBuckets = calcNBuckets(NumHashed); - MaskWords = calcMaskWords(NumHashed); - // Second hash shift estimation: just predefined values. - Shift2 = ELFT::Is64Bits ? 6 : 5; - - this->Header.sh_link = Out<ELFT>::DynSymTab->SectionIndex; - this->Header.sh_size = sizeof(Elf_Word) * 4 // Header - + sizeof(Elf_Off) * MaskWords // Bloom Filter - + sizeof(Elf_Word) * NBuckets // Hash Buckets - + sizeof(Elf_Word) * NumHashed; // Hash Values -} - -template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { - writeHeader(Buf); - if (Symbols.empty()) - return; - writeBloomFilter(Buf); - writeHashTable(Buf); -} - -template <class ELFT> -void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { - auto *P = reinterpret_cast<Elf_Word *>(Buf); - *P++ = NBuckets; - *P++ = Out<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(); - *P++ = MaskWords; - *P++ = Shift2; - Buf = reinterpret_cast<uint8_t *>(P); -} - -template <class ELFT> -void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { - unsigned C = sizeof(Elf_Off) * 8; - - auto *Masks = reinterpret_cast<Elf_Off *>(Buf); - for (const SymbolData &Sym : Symbols) { - size_t Pos = (Sym.Hash / C) & (MaskWords - 1); - uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | - (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); - Masks[Pos] |= V; - } - Buf += sizeof(Elf_Off) * MaskWords; -} +template <class ELFT> void OutputSection<ELFT>::finalize() { + if ((this->Flags & SHF_LINK_ORDER) && !this->Sections.empty()) { + std::sort(Sections.begin(), Sections.end(), compareByFilePosition<ELFT>); + Size = 0; + assignOffsets(); -template <class ELFT> -void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { - Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); - Elf_Word *Values = Buckets + NBuckets; - - int PrevBucket = -1; - int I = 0; - for (const SymbolData &Sym : Symbols) { - int Bucket = Sym.Hash % NBuckets; - assert(PrevBucket <= Bucket); - if (Bucket != PrevBucket) { - Buckets[Bucket] = Sym.Body->DynsymIndex; - PrevBucket = Bucket; - if (I > 0) - Values[I - 1] |= 1; - } - Values[I] = Sym.Hash & ~1; - ++I; + // We must preserve the link order dependency of sections with the + // SHF_LINK_ORDER flag. The dependency is indicated by the sh_link field. We + // need to translate the InputSection sh_link to the OutputSection sh_link, + // all InputSections in the OutputSection have the same dependency. + if (auto *D = this->Sections.front()->getLinkOrderDep()) + this->Link = D->OutSec->SectionIndex; } - if (I > 0) - Values[I - 1] |= 1; -} -// Add symbols to this symbol hash table. Note that this function -// destructively sort a given vector -- which is needed because -// GNU-style hash table places some sorting requirements. -template <class ELFT> -void GnuHashTableSection<ELFT>::addSymbols( - std::vector<std::pair<SymbolBody *, size_t>> &V) { - // Ideally this will just be 'auto' but GCC 6.1 is not able - // to deduce it correctly. - std::vector<std::pair<SymbolBody *, size_t>>::iterator Mid = - std::stable_partition(V.begin(), V.end(), - [](std::pair<SymbolBody *, size_t> &P) { - return P.first->isUndefined(); - }); - if (Mid == V.end()) + uint32_t Type = this->Type; + if (!Config->Relocatable || (Type != SHT_RELA && Type != SHT_REL)) return; - for (auto I = Mid, E = V.end(); I != E; ++I) { - SymbolBody *B = I->first; - size_t StrOff = I->second; - Symbols.push_back({B, StrOff, hashGnu(B->getName())}); - } - - unsigned NBuckets = calcNBuckets(Symbols.size()); - std::stable_sort(Symbols.begin(), Symbols.end(), - [&](const SymbolData &L, const SymbolData &R) { - return L.Hash % NBuckets < R.Hash % NBuckets; - }); - - V.erase(Mid, V.end()); - for (const SymbolData &Sym : Symbols) - V.push_back({Sym.Body, Sym.STName}); -} - -// Returns the number of version definition entries. Because the first entry -// is for the version definition itself, it is the number of versioned symbols -// plus one. Note that we don't support multiple versions yet. -static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } - -template <class ELFT> -DynamicSection<ELFT>::DynamicSection() - : OutputSectionBase<ELFT>(".dynamic", SHT_DYNAMIC, SHF_ALLOC | SHF_WRITE) { - Elf_Shdr &Header = this->Header; - Header.sh_addralign = sizeof(uintX_t); - Header.sh_entsize = ELFT::Is64Bits ? 16 : 8; - - // .dynamic section is not writable on MIPS. - // See "Special Section" in Chapter 4 in the following document: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - if (Config->EMachine == EM_MIPS) - Header.sh_flags = SHF_ALLOC; -} - -template <class ELFT> void DynamicSection<ELFT>::finalize() { - if (this->Header.sh_size) - return; // Already finalized. - - Elf_Shdr &Header = this->Header; - Header.sh_link = Out<ELFT>::DynStrTab->SectionIndex; - - auto Add = [=](Entry E) { Entries.push_back(E); }; - - // Add strings. We know that these are the last strings to be added to - // DynStrTab and doing this here allows this function to set DT_STRSZ. - if (!Config->RPath.empty()) - Add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, - Out<ELFT>::DynStrTab->addString(Config->RPath)}); - for (const std::unique_ptr<SharedFile<ELFT>> &F : - Symtab<ELFT>::X->getSharedFiles()) - if (F->isNeeded()) - Add({DT_NEEDED, Out<ELFT>::DynStrTab->addString(F->getSoName())}); - if (!Config->SoName.empty()) - Add({DT_SONAME, Out<ELFT>::DynStrTab->addString(Config->SoName)}); - - Out<ELFT>::DynStrTab->finalize(); - - if (Out<ELFT>::RelaDyn->hasRelocs()) { - bool IsRela = Config->Rela; - Add({IsRela ? DT_RELA : DT_REL, Out<ELFT>::RelaDyn}); - Add({IsRela ? DT_RELASZ : DT_RELSZ, Out<ELFT>::RelaDyn->getSize()}); - Add({IsRela ? DT_RELAENT : DT_RELENT, - uintX_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); - } - if (Out<ELFT>::RelaPlt && Out<ELFT>::RelaPlt->hasRelocs()) { - Add({DT_JMPREL, Out<ELFT>::RelaPlt}); - Add({DT_PLTRELSZ, Out<ELFT>::RelaPlt->getSize()}); - Add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, - Out<ELFT>::GotPlt}); - Add({DT_PLTREL, uint64_t(Config->Rela ? DT_RELA : DT_REL)}); - } - Add({DT_SYMTAB, Out<ELFT>::DynSymTab}); - Add({DT_SYMENT, sizeof(Elf_Sym)}); - Add({DT_STRTAB, Out<ELFT>::DynStrTab}); - Add({DT_STRSZ, Out<ELFT>::DynStrTab->getSize()}); - if (Out<ELFT>::GnuHashTab) - Add({DT_GNU_HASH, Out<ELFT>::GnuHashTab}); - if (Out<ELFT>::HashTab) - Add({DT_HASH, Out<ELFT>::HashTab}); - - if (PreInitArraySec) { - Add({DT_PREINIT_ARRAY, PreInitArraySec}); - Add({DT_PREINIT_ARRAYSZ, PreInitArraySec->getSize()}); - } - if (InitArraySec) { - Add({DT_INIT_ARRAY, InitArraySec}); - Add({DT_INIT_ARRAYSZ, (uintX_t)InitArraySec->getSize()}); - } - if (FiniArraySec) { - Add({DT_FINI_ARRAY, FiniArraySec}); - Add({DT_FINI_ARRAYSZ, (uintX_t)FiniArraySec->getSize()}); - } - - if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Init)) - Add({DT_INIT, B}); - if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Fini)) - Add({DT_FINI, B}); - - uint32_t DtFlags = 0; - uint32_t DtFlags1 = 0; - if (Config->Bsymbolic) - DtFlags |= DF_SYMBOLIC; - if (Config->ZNodelete) - DtFlags1 |= DF_1_NODELETE; - if (Config->ZNow) { - DtFlags |= DF_BIND_NOW; - DtFlags1 |= DF_1_NOW; - } - if (Config->ZOrigin) { - DtFlags |= DF_ORIGIN; - DtFlags1 |= DF_1_ORIGIN; - } - - if (DtFlags) - Add({DT_FLAGS, DtFlags}); - if (DtFlags1) - Add({DT_FLAGS_1, DtFlags1}); - - if (!Config->Entry.empty()) - Add({DT_DEBUG, (uint64_t)0}); - - bool HasVerNeed = Out<ELFT>::VerNeed->getNeedNum() != 0; - if (HasVerNeed || Out<ELFT>::VerDef) - Add({DT_VERSYM, Out<ELFT>::VerSym}); - if (Out<ELFT>::VerDef) { - Add({DT_VERDEF, Out<ELFT>::VerDef}); - Add({DT_VERDEFNUM, getVerDefNum()}); - } - if (HasVerNeed) { - Add({DT_VERNEED, Out<ELFT>::VerNeed}); - Add({DT_VERNEEDNUM, Out<ELFT>::VerNeed->getNeedNum()}); - } - - if (Config->EMachine == EM_MIPS) { - Add({DT_MIPS_RLD_VERSION, 1}); - Add({DT_MIPS_FLAGS, RHF_NOTPOT}); - Add({DT_MIPS_BASE_ADDRESS, Config->ImageBase}); - Add({DT_MIPS_SYMTABNO, Out<ELFT>::DynSymTab->getNumSymbols()}); - Add({DT_MIPS_LOCAL_GOTNO, Out<ELFT>::Got->getMipsLocalEntriesNum()}); - if (const SymbolBody *B = Out<ELFT>::Got->getMipsFirstGlobalEntry()) - Add({DT_MIPS_GOTSYM, B->DynsymIndex}); - else - Add({DT_MIPS_GOTSYM, Out<ELFT>::DynSymTab->getNumSymbols()}); - Add({DT_PLTGOT, Out<ELFT>::Got}); - if (Out<ELFT>::MipsRldMap) - Add({DT_MIPS_RLD_MAP, Out<ELFT>::MipsRldMap}); - } - - // +1 for DT_NULL - Header.sh_size = (Entries.size() + 1) * Header.sh_entsize; -} - -template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { - auto *P = reinterpret_cast<Elf_Dyn *>(Buf); - - for (const Entry &E : Entries) { - P->d_tag = E.Tag; - switch (E.Kind) { - case Entry::SecAddr: - P->d_un.d_ptr = E.OutSec->getVA(); - break; - case Entry::SymAddr: - P->d_un.d_ptr = E.Sym->template getVA<ELFT>(); - break; - case Entry::PlainInt: - P->d_un.d_val = E.Val; - break; - } - ++P; - } -} - -template <class ELFT> -EhFrameHeader<ELFT>::EhFrameHeader() - : OutputSectionBase<ELFT>(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC) {} - -// .eh_frame_hdr contains a binary search table of pointers to FDEs. -// Each entry of the search table consists of two values, -// the starting PC from where FDEs covers, and the FDE's address. -// It is sorted by PC. -template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { - const endianness E = ELFT::TargetEndianness; - - // Sort the FDE list by their PC and uniqueify. Usually there is only - // one FDE for a PC (i.e. function), but if ICF merges two functions - // into one, there can be more than one FDEs pointing to the address. - auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; - std::stable_sort(Fdes.begin(), Fdes.end(), Less); - auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; - Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); - - Buf[0] = 1; - Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; - Buf[2] = DW_EH_PE_udata4; - Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; - write32<E>(Buf + 4, Out<ELFT>::EhFrame->getVA() - this->getVA() - 4); - write32<E>(Buf + 8, Fdes.size()); - Buf += 12; - - uintX_t VA = this->getVA(); - for (FdeData &Fde : Fdes) { - write32<E>(Buf, Fde.Pc - VA); - write32<E>(Buf + 4, Fde.FdeVA - VA); - Buf += 8; - } -} - -template <class ELFT> void EhFrameHeader<ELFT>::finalize() { - // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. - this->Header.sh_size = 12 + Out<ELFT>::EhFrame->NumFdes * 8; -} - -template <class ELFT> -void EhFrameHeader<ELFT>::addFde(uint32_t Pc, uint32_t FdeVA) { - Fdes.push_back({Pc, FdeVA}); -} - -template <class ELFT> -OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t Type, uintX_t Flags) - : OutputSectionBase<ELFT>(Name, Type, Flags) { - if (Type == SHT_RELA) - this->Header.sh_entsize = sizeof(Elf_Rela); - else if (Type == SHT_REL) - this->Header.sh_entsize = sizeof(Elf_Rel); -} - -template <class ELFT> void OutputSection<ELFT>::finalize() { - uint32_t Type = this->Header.sh_type; - if (Type != SHT_RELA && Type != SHT_REL) - return; - this->Header.sh_link = Out<ELFT>::SymTab->SectionIndex; + this->Link = In<ELFT>::SymTab->OutSec->SectionIndex; // sh_info for SHT_REL[A] sections should contain the section header index of // the section to which the relocation applies. InputSectionBase<ELFT> *S = Sections[0]->getRelocatedSection(); - this->Header.sh_info = S->OutSec->SectionIndex; + this->Info = S->OutSec->SectionIndex; } template <class ELFT> -void OutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { +void OutputSection<ELFT>::addSection(InputSectionData *C) { assert(C->Live); auto *S = cast<InputSection<ELFT>>(C); Sections.push_back(S); S->OutSec = this; this->updateAlignment(S->Alignment); -} - -// If an input string is in the form of "foo.N" where N is a number, -// return N. Otherwise, returns 65536, which is one greater than the -// lowest priority. -static int getPriority(StringRef S) { - size_t Pos = S.rfind('.'); - if (Pos == StringRef::npos) - return 65536; - int V; - if (S.substr(Pos + 1).getAsInteger(10, V)) - return 65536; - return V; + // Keep sh_entsize value of the input section to be able to perform merging + // later during a final linking using the generated relocatable object. + if (Config->Relocatable && (S->Flags & SHF_MERGE)) + this->Entsize = S->Entsize; } // This function is called after we sort input sections // and scan relocations to setup sections' offsets. template <class ELFT> void OutputSection<ELFT>::assignOffsets() { - uintX_t Off = this->Header.sh_size; + uintX_t Off = this->Size; for (InputSection<ELFT> *S : Sections) { Off = alignTo(Off, S->Alignment); S->OutSecOff = Off; Off += S->getSize(); } - this->Header.sh_size = Off; + this->Size = Off; } -// Sorts input sections by section name suffixes, so that .foo.N comes -// before .foo.M if N < M. Used to sort .{init,fini}_array.N sections. -// We want to keep the original order if the priorities are the same -// because the compiler keeps the original initialization order in a -// translation unit and we need to respect that. -// For more detail, read the section of the GCC's manual about init_priority. -template <class ELFT> void OutputSection<ELFT>::sortInitFini() { - // Sort sections by priority. - typedef std::pair<int, InputSection<ELFT> *> Pair; +template <class ELFT> +void OutputSection<ELFT>::sort( + std::function<int(InputSection<ELFT> *S)> Order) { + typedef std::pair<unsigned, InputSection<ELFT> *> Pair; auto Comp = [](const Pair &A, const Pair &B) { return A.first < B.first; }; std::vector<Pair> V; for (InputSection<ELFT> *S : Sections) - V.push_back({getPriority(S->getSectionName()), S}); + V.push_back({Order(S), S}); std::stable_sort(V.begin(), V.end(), Comp); Sections.clear(); for (Pair &P : V) Sections.push_back(P.second); } +// Sorts input sections by section name suffixes, so that .foo.N comes +// before .foo.M if N < M. Used to sort .{init,fini}_array.N sections. +// We want to keep the original order if the priorities are the same +// because the compiler keeps the original initialization order in a +// translation unit and we need to respect that. +// For more detail, read the section of the GCC's manual about init_priority. +template <class ELFT> void OutputSection<ELFT>::sortInitFini() { + // Sort sections by priority. + sort([](InputSection<ELFT> *S) { return getPriority(S->Name); }); +} + // Returns true if S matches /Filename.?\.o$/. static bool isCrtBeginEnd(StringRef S, StringRef Filename) { if (!S.endswith(".o")) @@ -921,8 +219,8 @@ static bool compCtors(const InputSection<ELFT> *A, bool EndB = isCrtend(B->getFile()->getName()); if (EndA != EndB) return EndB; - StringRef X = A->getSectionName(); - StringRef Y = B->getSectionName(); + StringRef X = A->Name; + StringRef Y = B->Name; assert(X.startswith(".ctors") || X.startswith(".dtors")); assert(Y.startswith(".ctors") || Y.startswith(".dtors")); X = X.substr(6); @@ -939,65 +237,50 @@ template <class ELFT> void OutputSection<ELFT>::sortCtorsDtors() { std::stable_sort(Sections.begin(), Sections.end(), compCtors<ELFT>); } -static void fill(uint8_t *Buf, size_t Size, ArrayRef<uint8_t> A) { +// Fill [Buf, Buf + Size) with Filler. Filler is written in big +// endian order. This is used for linker script "=fillexp" command. +void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { + uint8_t V[4]; + write32be(V, Filler); size_t I = 0; - for (; I + A.size() < Size; I += A.size()) - memcpy(Buf + I, A.data(), A.size()); - memcpy(Buf + I, A.data(), Size - I); + for (; I + 4 < Size; I += 4) + memcpy(Buf + I, V, 4); + memcpy(Buf + I, V, Size - I); } template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) { - ArrayRef<uint8_t> Filler = Script<ELFT>::X->getFiller(this->Name); - if (!Filler.empty()) - fill(Buf, this->getSize(), Filler); - if (Config->Threads) { - parallel_for_each(Sections.begin(), Sections.end(), - [=](InputSection<ELFT> *C) { C->writeTo(Buf); }); - } else { - for (InputSection<ELFT> *C : Sections) - C->writeTo(Buf); - } + Loc = Buf; + if (uint32_t Filler = Script<ELFT>::X->getFiller(this->Name)) + fill(Buf, this->Size, Filler); + + auto Fn = [=](InputSection<ELFT> *IS) { IS->writeTo(Buf); }; + forEach(Sections.begin(), Sections.end(), Fn); + + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + Script<ELFT>::X->writeDataBytes(this->Name, Buf); } template <class ELFT> EhOutputSection<ELFT>::EhOutputSection() - : OutputSectionBase<ELFT>(".eh_frame", SHT_PROGBITS, SHF_ALLOC) {} - -// Returns the first relocation that points to a region -// between Begin and Begin+Size. -template <class IntTy, class RelTy> -static const RelTy *getReloc(IntTy Begin, IntTy Size, ArrayRef<RelTy> &Rels) { - for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) { - if (I->r_offset < Begin) - continue; - - // Truncate Rels for fast access. That means we expect that the - // relocations are sorted and we are looking up symbols in - // sequential order. It is naturally satisfied for .eh_frame. - Rels = Rels.slice(I - Rels.begin()); - if (I->r_offset < Begin + Size) - return I; - return nullptr; - } - Rels = ArrayRef<RelTy>(); - return nullptr; -} + : OutputSectionBase(".eh_frame", SHT_PROGBITS, SHF_ALLOC) {} // Search for an existing CIE record or create a new one. // CIE records from input object files are uniquified by their contents // and where their relocations point to. template <class ELFT> template <class RelTy> -CieRecord *EhOutputSection<ELFT>::addCie(SectionPiece &Piece, - EhInputSection<ELFT> *Sec, - ArrayRef<RelTy> &Rels) { +CieRecord *EhOutputSection<ELFT>::addCie(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); const endianness E = ELFT::TargetEndianness; if (read32<E>(Piece.data().data() + 4) != 0) - fatal("CIE expected at beginning of .eh_frame: " + Sec->getSectionName()); + fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); SymbolBody *Personality = nullptr; - if (const RelTy *Rel = getReloc(Piece.InputOff, Piece.size(), Rels)) - Personality = &Sec->getFile()->getRelocTargetSym(*Rel); + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI != (unsigned)-1) + Personality = &Sec->getFile()->getRelocTargetSym(Rels[FirstRelI]); // Search for an existing CIE by CIE contents/relocation target pair. CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; @@ -1014,13 +297,14 @@ CieRecord *EhOutputSection<ELFT>::addCie(SectionPiece &Piece, // points to a live function. template <class ELFT> template <class RelTy> -bool EhOutputSection<ELFT>::isFdeLive(SectionPiece &Piece, - EhInputSection<ELFT> *Sec, - ArrayRef<RelTy> &Rels) { - const RelTy *Rel = getReloc(Piece.InputOff, Piece.size(), Rels); - if (!Rel) - fatal("FDE doesn't reference another section"); - SymbolBody &B = Sec->getFile()->getRelocTargetSym(*Rel); +bool EhOutputSection<ELFT>::isFdeLive(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + fatal(toString(Sec) + ": FDE doesn't reference another section"); + const RelTy &Rel = Rels[FirstRelI]; + SymbolBody &B = Sec->getFile()->getRelocTargetSym(Rel); auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); if (!D || !D->Section) return false; @@ -1039,7 +323,7 @@ void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, const endianness E = ELFT::TargetEndianness; DenseMap<size_t, CieRecord *> OffsetToCie; - for (SectionPiece &Piece : Sec->Pieces) { + for (EhSectionPiece &Piece : Sec->Pieces) { // The empty record is the end marker. if (Piece.size() == 4) return; @@ -1047,16 +331,16 @@ void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, size_t Offset = Piece.InputOff; uint32_t ID = read32<E>(Piece.data().data() + 4); if (ID == 0) { - OffsetToCie[Offset] = addCie(Piece, Sec, Rels); + OffsetToCie[Offset] = addCie(Piece, Rels); continue; } uint32_t CieOffset = Offset + 4 - ID; CieRecord *Cie = OffsetToCie[CieOffset]; if (!Cie) - fatal("invalid CIE reference"); + fatal(toString(Sec) + ": invalid CIE reference"); - if (!isFdeLive(Piece, Sec, Rels)) + if (!isFdeLive(Piece, Rels)) continue; Cie->FdePieces.push_back(&Piece); NumFdes++; @@ -1064,7 +348,7 @@ void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, } template <class ELFT> -void EhOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { +void EhOutputSection<ELFT>::addSection(InputSectionData *C) { auto *Sec = cast<EhInputSection<ELFT>>(C); Sec->OutSec = this; this->updateAlignment(Sec->Alignment); @@ -1077,12 +361,11 @@ void EhOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { if (Sec->Pieces.empty()) return; - if (const Elf_Shdr *RelSec = Sec->RelocSection) { - ELFFile<ELFT> &Obj = Sec->getFile()->getObj(); - if (RelSec->sh_type == SHT_RELA) - addSectionAux(Sec, Obj.relas(RelSec)); + if (Sec->NumRelocations) { + if (Sec->AreRelocsRela) + addSectionAux(Sec, Sec->relas()); else - addSectionAux(Sec, Obj.rels(RelSec)); + addSectionAux(Sec, Sec->rels()); return; } addSectionAux(Sec, makeArrayRef<Elf_Rela>(nullptr, nullptr)); @@ -1098,7 +381,7 @@ static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { } template <class ELFT> void EhOutputSection<ELFT>::finalize() { - if (this->Header.sh_size) + if (this->Size) return; // Already finalized. size_t Off = 0; @@ -1106,12 +389,12 @@ template <class ELFT> void EhOutputSection<ELFT>::finalize() { Cie->Piece->OutputOff = Off; Off += alignTo(Cie->Piece->size(), sizeof(uintX_t)); - for (SectionPiece *Fde : Cie->FdePieces) { + for (EhSectionPiece *Fde : Cie->FdePieces) { Fde->OutputOff = Off; Off += alignTo(Fde->size(), sizeof(uintX_t)); } } - this->Header.sh_size = Off; + this->Size = Off; } template <class ELFT> static uint64_t readFdeAddr(uint8_t *Buf, int Size) { @@ -1143,7 +426,7 @@ typename ELFT::uint EhOutputSection<ELFT>::getFdePc(uint8_t *Buf, size_t FdeOff, if ((Enc & 0x70) == DW_EH_PE_absptr) return Addr; if ((Enc & 0x70) == DW_EH_PE_pcrel) - return Addr + this->getVA() + Off; + return Addr + this->Addr + Off; fatal("unknown FDE size relative encoding"); } @@ -1153,7 +436,7 @@ template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { size_t CieOffset = Cie->Piece->OutputOff; writeCieFde<ELFT>(Buf + CieOffset, Cie->Piece->data()); - for (SectionPiece *Fde : Cie->FdePieces) { + for (EhSectionPiece *Fde : Cie->FdePieces) { size_t Off = Fde->OutputOff; writeCieFde<ELFT>(Buf + Off, Fde->data()); @@ -1169,13 +452,13 @@ template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table // to get a FDE from an address to which FDE is applied. So here // we obtain two addresses and pass them to EhFrameHdr object. - if (Out<ELFT>::EhFrameHdr) { + if (In<ELFT>::EhFrameHdr) { for (CieRecord *Cie : Cies) { - uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece->data()); + uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece); for (SectionPiece *Fde : Cie->FdePieces) { uintX_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); - uintX_t FdeVA = this->getVA() + Fde->OutputOff; - Out<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); + uintX_t FdeVA = this->Addr + Fde->OutputOff; + In<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); } } } @@ -1184,620 +467,127 @@ template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { template <class ELFT> MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, uintX_t Alignment) - : OutputSectionBase<ELFT>(Name, Type, Flags), + : OutputSectionBase(Name, Type, Flags), Builder(StringTableBuilder::RAW, Alignment) {} template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) { - if (shouldTailMerge()) { - StringRef Data = Builder.data(); - memcpy(Buf, Data.data(), Data.size()); - return; - } - for (const std::pair<CachedHash<StringRef>, size_t> &P : Builder.getMap()) { - StringRef Data = P.first.Val; - memcpy(Buf + P.second, Data.data(), Data.size()); - } -} - -static StringRef toStringRef(ArrayRef<uint8_t> A) { - return {(const char *)A.data(), A.size()}; + Builder.write(Buf); } template <class ELFT> -void MergeOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { +void MergeOutputSection<ELFT>::addSection(InputSectionData *C) { auto *Sec = cast<MergeInputSection<ELFT>>(C); Sec->OutSec = this; this->updateAlignment(Sec->Alignment); - this->Header.sh_entsize = Sec->getSectionHdr()->sh_entsize; + this->Entsize = Sec->Entsize; Sections.push_back(Sec); - - bool IsString = this->Header.sh_flags & SHF_STRINGS; - - for (SectionPiece &Piece : Sec->Pieces) { - if (!Piece.Live) - continue; - uintX_t OutputOffset = Builder.add(toStringRef(Piece.data())); - if (!IsString || !shouldTailMerge()) - Piece.OutputOff = OutputOffset; - } -} - -template <class ELFT> -unsigned MergeOutputSection<ELFT>::getOffset(StringRef Val) { - return Builder.getOffset(Val); } template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const { - return Config->Optimize >= 2 && this->Header.sh_flags & SHF_STRINGS; + return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; } -template <class ELFT> void MergeOutputSection<ELFT>::finalize() { - if (shouldTailMerge()) - Builder.finalize(); - this->Header.sh_size = Builder.getSize(); -} - -template <class ELFT> void MergeOutputSection<ELFT>::finalizePieces() { +template <class ELFT> void MergeOutputSection<ELFT>::finalizeTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. for (MergeInputSection<ELFT> *Sec : Sections) - Sec->finalizePieces(); -} - -template <class ELFT> -StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) - : OutputSectionBase<ELFT>(Name, SHT_STRTAB, - Dynamic ? (uintX_t)SHF_ALLOC : 0), - Dynamic(Dynamic) {} - -// Adds a string to the string table. If HashIt is true we hash and check for -// duplicates. It is optional because the name of global symbols are already -// uniqued and hashing them again has a big cost for a small value: uniquing -// them with some other string that happens to be the same. -template <class ELFT> -unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { - if (HashIt) { - auto R = StringMap.insert(std::make_pair(S, Size)); - if (!R.second) - return R.first->second; - } - unsigned Ret = Size; - Size += S.size() + 1; - Strings.push_back(S); - return Ret; -} - -template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { - // ELF string tables start with NUL byte, so advance the pointer by one. - ++Buf; - for (StringRef S : Strings) { - memcpy(Buf, S.data(), S.size()); - Buf += S.size() + 1; - } -} - -template <class ELFT> -typename ELFT::uint DynamicReloc<ELFT>::getOffset() const { - if (OutputSec) - return OutputSec->getVA() + OffsetInSec; - return InputSec->OutSec->getVA() + InputSec->getOffset(OffsetInSec); -} - -template <class ELFT> -typename ELFT::uint DynamicReloc<ELFT>::getAddend() const { - if (UseSymVA) - return Sym->getVA<ELFT>(Addend); - return Addend; -} - -template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { - if (Sym && !UseSymVA) - return Sym->DynsymIndex; - return 0; -} - -template <class ELFT> -SymbolTableSection<ELFT>::SymbolTableSection( - StringTableSection<ELFT> &StrTabSec) - : OutputSectionBase<ELFT>(StrTabSec.isDynamic() ? ".dynsym" : ".symtab", - StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, - StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0), - StrTabSec(StrTabSec) { - this->Header.sh_entsize = sizeof(Elf_Sym); - this->Header.sh_addralign = sizeof(uintX_t); -} - -// Orders symbols according to their positions in the GOT, -// in compliance with MIPS ABI rules. -// See "Global Offset Table" in Chapter 5 in the following document -// for detailed description: -// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -static bool sortMipsSymbols(const std::pair<SymbolBody *, unsigned> &L, - const std::pair<SymbolBody *, unsigned> &R) { - // Sort entries related to non-local preemptible symbols by GOT indexes. - // All other entries go to the first part of GOT in arbitrary order. - bool LIsInLocalGot = !L.first->IsInGlobalMipsGot; - bool RIsInLocalGot = !R.first->IsInGlobalMipsGot; - if (LIsInLocalGot || RIsInLocalGot) - return !RIsInLocalGot; - return L.first->GotIndex < R.first->GotIndex; -} - -static uint8_t getSymbolBinding(SymbolBody *Body) { - Symbol *S = Body->symbol(); - uint8_t Visibility = S->Visibility; - if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) - return STB_LOCAL; - if (Config->NoGnuUnique && S->Binding == STB_GNU_UNIQUE) - return STB_GLOBAL; - return S->Binding; -} - -template <class ELFT> void SymbolTableSection<ELFT>::finalize() { - if (this->Header.sh_size) - return; // Already finalized. - - this->Header.sh_size = getNumSymbols() * sizeof(Elf_Sym); - this->Header.sh_link = StrTabSec.SectionIndex; - this->Header.sh_info = NumLocals + 1; - - if (Config->Relocatable) { - size_t I = NumLocals; - for (const std::pair<SymbolBody *, size_t> &P : Symbols) - P.first->DynsymIndex = ++I; - return; - } - - if (!StrTabSec.isDynamic()) { - std::stable_sort(Symbols.begin(), Symbols.end(), - [](const std::pair<SymbolBody *, unsigned> &L, - const std::pair<SymbolBody *, unsigned> &R) { - return getSymbolBinding(L.first) == STB_LOCAL && - getSymbolBinding(R.first) != STB_LOCAL; - }); - return; - } - if (Out<ELFT>::GnuHashTab) - // NB: It also sorts Symbols to meet the GNU hash table requirements. - Out<ELFT>::GnuHashTab->addSymbols(Symbols); - else if (Config->EMachine == EM_MIPS) - std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); - size_t I = 0; - for (const std::pair<SymbolBody *, size_t> &P : Symbols) - P.first->DynsymIndex = ++I; -} - -template <class ELFT> -void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) { - Symbols.push_back({B, StrTabSec.addString(B->getName(), false)}); -} - -template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { - Buf += sizeof(Elf_Sym); - - // All symbols with STB_LOCAL binding precede the weak and global symbols. - // .dynsym only contains global symbols. - if (!Config->DiscardAll && !StrTabSec.isDynamic()) - writeLocalSymbols(Buf); - - writeGlobalSymbols(Buf); -} - -template <class ELFT> -void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { - // Iterate over all input object files to copy their local symbols - // to the output symbol table pointed by Buf. - for (const std::unique_ptr<ObjectFile<ELFT>> &File : - Symtab<ELFT>::X->getObjectFiles()) { - for (const std::pair<const DefinedRegular<ELFT> *, size_t> &P : - File->KeptLocalSyms) { - const DefinedRegular<ELFT> &Body = *P.first; - InputSectionBase<ELFT> *Section = Body.Section; - auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); - - if (!Section) { - ESym->st_shndx = SHN_ABS; - ESym->st_value = Body.Value; - } else { - const OutputSectionBase<ELFT> *OutSec = Section->OutSec; - ESym->st_shndx = OutSec->SectionIndex; - ESym->st_value = OutSec->getVA() + Section->getOffset(Body); - } - ESym->st_name = P.second; - ESym->st_size = Body.template getSize<ELFT>(); - ESym->setBindingAndType(STB_LOCAL, Body.Type); - Buf += sizeof(*ESym); - } - } -} - -template <class ELFT> -void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { - // Write the internal symbol table contents to the output symbol table - // pointed by Buf. - auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); - for (const std::pair<SymbolBody *, size_t> &P : Symbols) { - SymbolBody *Body = P.first; - size_t StrOff = P.second; - - uint8_t Type = Body->Type; - uintX_t Size = Body->getSize<ELFT>(); - - ESym->setBindingAndType(getSymbolBinding(Body), Type); - ESym->st_size = Size; - ESym->st_name = StrOff; - ESym->setVisibility(Body->symbol()->Visibility); - ESym->st_value = Body->getVA<ELFT>(); - - if (const OutputSectionBase<ELFT> *OutSec = getOutputSection(Body)) - ESym->st_shndx = OutSec->SectionIndex; - else if (isa<DefinedRegular<ELFT>>(Body)) - ESym->st_shndx = SHN_ABS; - - // On MIPS we need to mark symbol which has a PLT entry and requires pointer - // equality by STO_MIPS_PLT flag. That is necessary to help dynamic linker - // distinguish such symbols and MIPS lazy-binding stubs. - // https://sourceware.org/ml/binutils/2008-07/txt00000.txt - if (Config->EMachine == EM_MIPS && Body->isInPlt() && - Body->NeedsCopyOrPltAddr) - ESym->st_other |= STO_MIPS_PLT; - ++ESym; - } -} - -template <class ELFT> -const OutputSectionBase<ELFT> * -SymbolTableSection<ELFT>::getOutputSection(SymbolBody *Sym) { - switch (Sym->kind()) { - case SymbolBody::DefinedSyntheticKind: - return cast<DefinedSynthetic<ELFT>>(Sym)->Section; - case SymbolBody::DefinedRegularKind: { - auto &D = cast<DefinedRegular<ELFT>>(*Sym); - if (D.Section) - return D.Section->OutSec; - break; - } - case SymbolBody::DefinedCommonKind: - return Out<ELFT>::Bss; - case SymbolBody::SharedKind: - if (cast<SharedSymbol<ELFT>>(Sym)->needsCopy()) - return Out<ELFT>::Bss; - break; - case SymbolBody::UndefinedKind: - case SymbolBody::LazyArchiveKind: - case SymbolBody::LazyObjectKind: - break; - case SymbolBody::DefinedBitcodeKind: - llvm_unreachable("should have been replaced"); - } - return nullptr; -} - -template <class ELFT> -VersionDefinitionSection<ELFT>::VersionDefinitionSection() - : OutputSectionBase<ELFT>(".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC) { - this->Header.sh_addralign = sizeof(uint32_t); -} - -static StringRef getFileDefName() { - if (!Config->SoName.empty()) - return Config->SoName; - return Config->OutputFile; -} - -template <class ELFT> void VersionDefinitionSection<ELFT>::finalize() { - FileDefNameOff = Out<ELFT>::DynStrTab->addString(getFileDefName()); - for (VersionDefinition &V : Config->VersionDefinitions) - V.NameOff = Out<ELFT>::DynStrTab->addString(V.Name); - - this->Header.sh_size = - (sizeof(Elf_Verdef) + sizeof(Elf_Verdaux)) * getVerDefNum(); - this->Header.sh_link = Out<ELFT>::DynStrTab->SectionIndex; - - // sh_info should be set to the number of definitions. This fact is missed in - // documentation, but confirmed by binutils community: - // https://sourceware.org/ml/binutils/2014-11/msg00355.html - this->Header.sh_info = getVerDefNum(); -} - -template <class ELFT> -void VersionDefinitionSection<ELFT>::writeOne(uint8_t *Buf, uint32_t Index, - StringRef Name, size_t NameOff) { - auto *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); - Verdef->vd_version = 1; - Verdef->vd_cnt = 1; - Verdef->vd_aux = sizeof(Elf_Verdef); - Verdef->vd_next = sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); - Verdef->vd_flags = (Index == 1 ? VER_FLG_BASE : 0); - Verdef->vd_ndx = Index; - Verdef->vd_hash = hashSysv(Name); - - auto *Verdaux = reinterpret_cast<Elf_Verdaux *>(Buf + sizeof(Elf_Verdef)); - Verdaux->vda_name = NameOff; - Verdaux->vda_next = 0; -} - -template <class ELFT> -void VersionDefinitionSection<ELFT>::writeTo(uint8_t *Buf) { - writeOne(Buf, 1, getFileDefName(), FileDefNameOff); - - for (VersionDefinition &V : Config->VersionDefinitions) { - Buf += sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); - writeOne(Buf, V.Id, V.Name, V.NameOff); - } - - // Need to terminate the last version definition. - Elf_Verdef *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); - Verdef->vd_next = 0; -} - -template <class ELFT> -VersionTableSection<ELFT>::VersionTableSection() - : OutputSectionBase<ELFT>(".gnu.version", SHT_GNU_versym, SHF_ALLOC) { - this->Header.sh_addralign = sizeof(uint16_t); -} - -template <class ELFT> void VersionTableSection<ELFT>::finalize() { - this->Header.sh_size = - sizeof(Elf_Versym) * (Out<ELFT>::DynSymTab->getSymbols().size() + 1); - this->Header.sh_entsize = sizeof(Elf_Versym); - // At the moment of june 2016 GNU docs does not mention that sh_link field - // should be set, but Sun docs do. Also readelf relies on this field. - this->Header.sh_link = Out<ELFT>::DynSymTab->SectionIndex; -} - -template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) { - auto *OutVersym = reinterpret_cast<Elf_Versym *>(Buf) + 1; - for (const std::pair<SymbolBody *, size_t> &P : - Out<ELFT>::DynSymTab->getSymbols()) { - OutVersym->vs_index = P.first->symbol()->VersionId; - ++OutVersym; - } -} - -template <class ELFT> -VersionNeedSection<ELFT>::VersionNeedSection() - : OutputSectionBase<ELFT>(".gnu.version_r", SHT_GNU_verneed, SHF_ALLOC) { - this->Header.sh_addralign = sizeof(uint32_t); - - // Identifiers in verneed section start at 2 because 0 and 1 are reserved - // for VER_NDX_LOCAL and VER_NDX_GLOBAL. - // First identifiers are reserved by verdef section if it exist. - NextIndex = getVerDefNum() + 1; -} - -template <class ELFT> -void VersionNeedSection<ELFT>::addSymbol(SharedSymbol<ELFT> *SS) { - if (!SS->Verdef) { - SS->symbol()->VersionId = VER_NDX_GLOBAL; - return; - } - SharedFile<ELFT> *F = SS->file(); - // If we don't already know that we need an Elf_Verneed for this DSO, prepare - // to create one by adding it to our needed list and creating a dynstr entry - // for the soname. - if (F->VerdefMap.empty()) - Needed.push_back({F, Out<ELFT>::DynStrTab->addString(F->getSoName())}); - typename SharedFile<ELFT>::NeededVer &NV = F->VerdefMap[SS->Verdef]; - // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, - // prepare to create one by allocating a version identifier and creating a - // dynstr entry for the version name. - if (NV.Index == 0) { - NV.StrTab = Out<ELFT>::DynStrTab->addString( - SS->file()->getStringTable().data() + SS->Verdef->getAux()->vda_name); - NV.Index = NextIndex++; - } - SS->symbol()->VersionId = NV.Index; -} - -template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) { - // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. - auto *Verneed = reinterpret_cast<Elf_Verneed *>(Buf); - auto *Vernaux = reinterpret_cast<Elf_Vernaux *>(Verneed + Needed.size()); - - for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) { - // Create an Elf_Verneed for this DSO. - Verneed->vn_version = 1; - Verneed->vn_cnt = P.first->VerdefMap.size(); - Verneed->vn_file = P.second; - Verneed->vn_aux = - reinterpret_cast<char *>(Vernaux) - reinterpret_cast<char *>(Verneed); - Verneed->vn_next = sizeof(Elf_Verneed); - ++Verneed; - - // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over - // VerdefMap, which will only contain references to needed version - // definitions. Each Elf_Vernaux is based on the information contained in - // the Elf_Verdef in the source DSO. This loop iterates over a std::map of - // pointers, but is deterministic because the pointers refer to Elf_Verdef - // data structures within a single input file. - for (auto &NV : P.first->VerdefMap) { - Vernaux->vna_hash = NV.first->vd_hash; - Vernaux->vna_flags = 0; - Vernaux->vna_other = NV.second.Index; - Vernaux->vna_name = NV.second.StrTab; - Vernaux->vna_next = sizeof(Elf_Vernaux); - ++Vernaux; - } - - Vernaux[-1].vna_next = 0; - } - Verneed[-1].vn_next = 0; -} - -template <class ELFT> void VersionNeedSection<ELFT>::finalize() { - this->Header.sh_link = Out<ELFT>::DynStrTab->SectionIndex; - this->Header.sh_info = Needed.size(); - unsigned Size = Needed.size() * sizeof(Elf_Verneed); - for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) - Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux); - this->Header.sh_size = Size; -} - -template <class ELFT> -BuildIdSection<ELFT>::BuildIdSection(size_t HashSize) - : OutputSectionBase<ELFT>(".note.gnu.build-id", SHT_NOTE, SHF_ALLOC), - HashSize(HashSize) { - // 16 bytes for the note section header. - this->Header.sh_size = 16 + HashSize; -} - -template <class ELFT> void BuildIdSection<ELFT>::writeTo(uint8_t *Buf) { - const endianness E = ELFT::TargetEndianness; - write32<E>(Buf, 4); // Name size - write32<E>(Buf + 4, HashSize); // Content size - write32<E>(Buf + 8, NT_GNU_BUILD_ID); // Type - memcpy(Buf + 12, "GNU", 4); // Name string - HashBuf = Buf + 16; -} - -template <class ELFT> -void BuildIdFnv1<ELFT>::writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) { - const endianness E = ELFT::TargetEndianness; + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Builder.add(Sec->getData(I)); - // 64-bit FNV-1 hash - uint64_t Hash = 0xcbf29ce484222325; - for (ArrayRef<uint8_t> Buf : Bufs) { - for (uint8_t B : Buf) { - Hash *= 0x100000001b3; - Hash ^= B; - } - } - write64<E>(this->HashBuf, Hash); -} + // Fix the string table content. After this, the contents will never change. + Builder.finalize(); + this->Size = Builder.getSize(); -template <class ELFT> -void BuildIdMd5<ELFT>::writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) { - MD5 Hash; - for (ArrayRef<uint8_t> Buf : Bufs) - Hash.update(Buf); - MD5::MD5Result Res; - Hash.final(Res); - memcpy(this->HashBuf, Res, 16); -} - -template <class ELFT> -void BuildIdSha1<ELFT>::writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) { - SHA1 Hash; - for (ArrayRef<uint8_t> Buf : Bufs) - Hash.update(Buf); - memcpy(this->HashBuf, Hash.final().data(), 20); + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding StringPiece for easy access. + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); } -template <class ELFT> -BuildIdHexstring<ELFT>::BuildIdHexstring() - : BuildIdSection<ELFT>(Config->BuildIdVector.size()) {} +template <class ELFT> void MergeOutputSection<ELFT>::finalizeNoTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. Because we are not tail-optimizing, offsets of strings are + // fixed when they are added to the builder (string table builder contains + // a hash table from strings to offsets). + for (MergeInputSection<ELFT> *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); -template <class ELFT> -void BuildIdHexstring<ELFT>::writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) { - memcpy(this->HashBuf, Config->BuildIdVector.data(), - Config->BuildIdVector.size()); + Builder.finalizeInOrder(); + this->Size = Builder.getSize(); } -template <class ELFT> -MipsReginfoOutputSection<ELFT>::MipsReginfoOutputSection() - : OutputSectionBase<ELFT>(".reginfo", SHT_MIPS_REGINFO, SHF_ALLOC) { - this->Header.sh_addralign = 4; - this->Header.sh_entsize = sizeof(Elf_Mips_RegInfo); - this->Header.sh_size = sizeof(Elf_Mips_RegInfo); +template <class ELFT> void MergeOutputSection<ELFT>::finalize() { + if (shouldTailMerge()) + finalizeTailMerge(); + else + finalizeNoTailMerge(); } template <class ELFT> -void MipsReginfoOutputSection<ELFT>::writeTo(uint8_t *Buf) { - auto *R = reinterpret_cast<Elf_Mips_RegInfo *>(Buf); - R->ri_gp_value = Out<ELFT>::Got->getVA() + MipsGPOffset; - R->ri_gprmask = GprMask; +static typename ELFT::uint getOutFlags(InputSectionBase<ELFT> *S) { + return S->Flags & ~SHF_GROUP & ~SHF_COMPRESSED; } template <class ELFT> -void MipsReginfoOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { - // Copy input object file's .reginfo gprmask to output. - auto *S = cast<MipsReginfoInputSection<ELFT>>(C); - GprMask |= S->Reginfo->ri_gprmask; - S->OutSec = this; -} +static SectionKey<ELFT::Is64Bits> createKey(InputSectionBase<ELFT> *C, + StringRef OutsecName) { + typedef typename ELFT::uint uintX_t; + uintX_t Flags = getOutFlags(C); -template <class ELFT> -MipsOptionsOutputSection<ELFT>::MipsOptionsOutputSection() - : OutputSectionBase<ELFT>(".MIPS.options", SHT_MIPS_OPTIONS, - SHF_ALLOC | SHF_MIPS_NOSTRIP) { - this->Header.sh_addralign = 8; - this->Header.sh_entsize = 1; - this->Header.sh_size = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); -} + // For SHF_MERGE we create different output sections for each alignment. + // This makes each output section simple and keeps a single level mapping from + // input to output. + // In case of relocatable object generation we do not try to perform merging + // and treat SHF_MERGE sections as regular ones, but also create different + // output sections for them to allow merging at final linking stage. + uintX_t Alignment = 0; + if (isa<MergeInputSection<ELFT>>(C) || + (Config->Relocatable && (C->Flags & SHF_MERGE))) + Alignment = std::max<uintX_t>(C->Alignment, C->Entsize); -template <class ELFT> -void MipsOptionsOutputSection<ELFT>::writeTo(uint8_t *Buf) { - auto *Opt = reinterpret_cast<Elf_Mips_Options *>(Buf); - Opt->kind = ODK_REGINFO; - Opt->size = this->Header.sh_size; - Opt->section = 0; - Opt->info = 0; - auto *Reg = reinterpret_cast<Elf_Mips_RegInfo *>(Buf + sizeof(*Opt)); - Reg->ri_gp_value = Out<ELFT>::Got->getVA() + MipsGPOffset; - Reg->ri_gprmask = GprMask; + return SectionKey<ELFT::Is64Bits>{OutsecName, C->Type, Flags, Alignment}; } template <class ELFT> -void MipsOptionsOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) { - auto *S = cast<MipsOptionsInputSection<ELFT>>(C); - if (S->Reginfo) - GprMask |= S->Reginfo->ri_gprmask; - S->OutSec = this; -} - -template <class ELFT> -std::pair<OutputSectionBase<ELFT> *, bool> +std::pair<OutputSectionBase *, bool> OutputSectionFactory<ELFT>::create(InputSectionBase<ELFT> *C, StringRef OutsecName) { SectionKey<ELFT::Is64Bits> Key = createKey(C, OutsecName); - OutputSectionBase<ELFT> *&Sec = Map[Key]; - if (Sec) + return create(Key, C); +} + +template <class ELFT> +std::pair<OutputSectionBase *, bool> +OutputSectionFactory<ELFT>::create(const SectionKey<ELFT::Is64Bits> &Key, + InputSectionBase<ELFT> *C) { + uintX_t Flags = getOutFlags(C); + OutputSectionBase *&Sec = Map[Key]; + if (Sec) { + Sec->Flags |= Flags; return {Sec, false}; + } - switch (C->SectionKind) { + uint32_t Type = C->Type; + switch (C->kind()) { case InputSectionBase<ELFT>::Regular: - Sec = new OutputSection<ELFT>(Key.Name, Key.Type, Key.Flags); + case InputSectionBase<ELFT>::Synthetic: + Sec = make<OutputSection<ELFT>>(Key.Name, Type, Flags); break; case InputSectionBase<ELFT>::EHFrame: return {Out<ELFT>::EhFrame, false}; case InputSectionBase<ELFT>::Merge: - Sec = new MergeOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags, - Key.Alignment); - break; - case InputSectionBase<ELFT>::MipsReginfo: - Sec = new MipsReginfoOutputSection<ELFT>(); - break; - case InputSectionBase<ELFT>::MipsOptions: - Sec = new MipsOptionsOutputSection<ELFT>(); + Sec = make<MergeOutputSection<ELFT>>(Key.Name, Type, Flags, Key.Alignment); break; } return {Sec, true}; } -template <class ELFT> -OutputSectionBase<ELFT> *OutputSectionFactory<ELFT>::lookup(StringRef Name, - uint32_t Type, - uintX_t Flags) { - return Map.lookup({Name, Type, Flags, 0}); -} - -template <class ELFT> -SectionKey<ELFT::Is64Bits> -OutputSectionFactory<ELFT>::createKey(InputSectionBase<ELFT> *C, - StringRef OutsecName) { - const Elf_Shdr *H = C->getSectionHdr(); - uintX_t Flags = H->sh_flags & ~SHF_GROUP & ~SHF_COMPRESSED; - - // For SHF_MERGE we create different output sections for each alignment. - // This makes each output section simple and keeps a single level mapping from - // input to output. - uintX_t Alignment = 0; - if (isa<MergeInputSection<ELFT>>(C)) - Alignment = std::max(H->sh_addralign, H->sh_entsize); - - uint32_t Type = H->sh_type; - return SectionKey<ELFT::Is64Bits>{OutsecName, Type, Flags, Alignment}; -} - template <bool Is64Bits> typename lld::elf::SectionKey<Is64Bits> DenseMapInfo<lld::elf::SectionKey<Is64Bits>>::getEmptyKey() { @@ -1832,55 +622,11 @@ template struct DenseMapInfo<SectionKey<false>>; namespace lld { namespace elf { -template class OutputSectionBase<ELF32LE>; -template class OutputSectionBase<ELF32BE>; -template class OutputSectionBase<ELF64LE>; -template class OutputSectionBase<ELF64BE>; - -template class EhFrameHeader<ELF32LE>; -template class EhFrameHeader<ELF32BE>; -template class EhFrameHeader<ELF64LE>; -template class EhFrameHeader<ELF64BE>; - -template class GotPltSection<ELF32LE>; -template class GotPltSection<ELF32BE>; -template class GotPltSection<ELF64LE>; -template class GotPltSection<ELF64BE>; - -template class GotSection<ELF32LE>; -template class GotSection<ELF32BE>; -template class GotSection<ELF64LE>; -template class GotSection<ELF64BE>; - -template class PltSection<ELF32LE>; -template class PltSection<ELF32BE>; -template class PltSection<ELF64LE>; -template class PltSection<ELF64BE>; - -template class RelocationSection<ELF32LE>; -template class RelocationSection<ELF32BE>; -template class RelocationSection<ELF64LE>; -template class RelocationSection<ELF64BE>; - -template class InterpSection<ELF32LE>; -template class InterpSection<ELF32BE>; -template class InterpSection<ELF64LE>; -template class InterpSection<ELF64BE>; - -template class GnuHashTableSection<ELF32LE>; -template class GnuHashTableSection<ELF32BE>; -template class GnuHashTableSection<ELF64LE>; -template class GnuHashTableSection<ELF64BE>; - -template class HashTableSection<ELF32LE>; -template class HashTableSection<ELF32BE>; -template class HashTableSection<ELF64LE>; -template class HashTableSection<ELF64BE>; - -template class DynamicSection<ELF32LE>; -template class DynamicSection<ELF32BE>; -template class DynamicSection<ELF64LE>; -template class DynamicSection<ELF64BE>; + +template void OutputSectionBase::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr); +template void OutputSectionBase::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr); template class OutputSection<ELF32LE>; template class OutputSection<ELF32BE>; @@ -1892,71 +638,11 @@ template class EhOutputSection<ELF32BE>; template class EhOutputSection<ELF64LE>; template class EhOutputSection<ELF64BE>; -template class MipsReginfoOutputSection<ELF32LE>; -template class MipsReginfoOutputSection<ELF32BE>; -template class MipsReginfoOutputSection<ELF64LE>; -template class MipsReginfoOutputSection<ELF64BE>; - -template class MipsOptionsOutputSection<ELF32LE>; -template class MipsOptionsOutputSection<ELF32BE>; -template class MipsOptionsOutputSection<ELF64LE>; -template class MipsOptionsOutputSection<ELF64BE>; - template class MergeOutputSection<ELF32LE>; template class MergeOutputSection<ELF32BE>; template class MergeOutputSection<ELF64LE>; template class MergeOutputSection<ELF64BE>; -template class StringTableSection<ELF32LE>; -template class StringTableSection<ELF32BE>; -template class StringTableSection<ELF64LE>; -template class StringTableSection<ELF64BE>; - -template class SymbolTableSection<ELF32LE>; -template class SymbolTableSection<ELF32BE>; -template class SymbolTableSection<ELF64LE>; -template class SymbolTableSection<ELF64BE>; - -template class VersionTableSection<ELF32LE>; -template class VersionTableSection<ELF32BE>; -template class VersionTableSection<ELF64LE>; -template class VersionTableSection<ELF64BE>; - -template class VersionNeedSection<ELF32LE>; -template class VersionNeedSection<ELF32BE>; -template class VersionNeedSection<ELF64LE>; -template class VersionNeedSection<ELF64BE>; - -template class VersionDefinitionSection<ELF32LE>; -template class VersionDefinitionSection<ELF32BE>; -template class VersionDefinitionSection<ELF64LE>; -template class VersionDefinitionSection<ELF64BE>; - -template class BuildIdSection<ELF32LE>; -template class BuildIdSection<ELF32BE>; -template class BuildIdSection<ELF64LE>; -template class BuildIdSection<ELF64BE>; - -template class BuildIdFnv1<ELF32LE>; -template class BuildIdFnv1<ELF32BE>; -template class BuildIdFnv1<ELF64LE>; -template class BuildIdFnv1<ELF64BE>; - -template class BuildIdMd5<ELF32LE>; -template class BuildIdMd5<ELF32BE>; -template class BuildIdMd5<ELF64LE>; -template class BuildIdMd5<ELF64BE>; - -template class BuildIdSha1<ELF32LE>; -template class BuildIdSha1<ELF32BE>; -template class BuildIdSha1<ELF64LE>; -template class BuildIdSha1<ELF64BE>; - -template class BuildIdHexstring<ELF32LE>; -template class BuildIdHexstring<ELF32BE>; -template class BuildIdHexstring<ELF64LE>; -template class BuildIdHexstring<ELF64BE>; - template class OutputSectionFactory<ELF32LE>; template class OutputSectionFactory<ELF32BE>; template class OutputSectionFactory<ELF64LE>; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h index 5fdf8de4cb46..978b1f8191ef 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.h +++ b/contrib/llvm/tools/lld/ELF/OutputSections.h @@ -14,25 +14,19 @@ #include "Relocations.h" #include "lld/Core/LLVM.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELF.h" -#include "llvm/Support/MD5.h" -#include "llvm/Support/SHA1.h" namespace lld { namespace elf { +struct PhdrEntry; class SymbolBody; -struct SectionPiece; -template <class ELFT> class SymbolTable; -template <class ELFT> class SymbolTableSection; -template <class ELFT> class StringTableSection; +struct EhSectionPiece; template <class ELFT> class EhInputSection; template <class ELFT> class InputSection; template <class ELFT> class InputSectionBase; template <class ELFT> class MergeInputSection; -template <class ELFT> class MipsReginfoInputSection; template <class ELFT> class OutputSection; template <class ELFT> class ObjectFile; template <class ELFT> class SharedFile; @@ -44,341 +38,126 @@ template <class ELFT> class DefinedRegular; // input sections, others are created by the linker. // The writer creates multiple OutputSections and assign them unique, // non-overlapping file offsets and VAs. -template <class ELFT> class OutputSectionBase { +class OutputSectionBase { public: - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Shdr Elf_Shdr; + enum Kind { + Base, + EHFrame, + Merge, + Regular, + }; - OutputSectionBase(StringRef Name, uint32_t Type, uintX_t Flags); - void setVA(uintX_t VA) { Header.sh_addr = VA; } - uintX_t getVA() const { return Header.sh_addr; } - void setFileOffset(uintX_t Off) { Header.sh_offset = Off; } - void setSHName(unsigned Val) { Header.sh_name = Val; } - void writeHeaderTo(Elf_Shdr *SHdr); - StringRef getName() { return Name; } + OutputSectionBase(StringRef Name, uint32_t Type, uint64_t Flags); + void setLMAOffset(uint64_t LMAOff) { LMAOffset = LMAOff; } + uint64_t getLMA() const { return Addr + LMAOffset; } + template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr); + StringRef getName() const { return Name; } - virtual void addSection(InputSectionBase<ELFT> *C) {} + virtual void addSection(InputSectionData *C) {} + virtual Kind getKind() const { return Base; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Base; + } unsigned SectionIndex; - // Returns the size of the section in the output file. - uintX_t getSize() const { return Header.sh_size; } - void setSize(uintX_t Val) { Header.sh_size = Val; } - uintX_t getFlags() const { return Header.sh_flags; } - uintX_t getFileOff() const { return Header.sh_offset; } - uintX_t getAlignment() const { return Header.sh_addralign; } - uint32_t getType() const { return Header.sh_type; } - - void updateAlignment(uintX_t Alignment) { - if (Alignment > Header.sh_addralign) - Header.sh_addralign = Alignment; + uint32_t getPhdrFlags() const; + + void updateAlignment(uint64_t Alignment) { + if (Alignment > Addralign) + Addralign = Alignment; } // If true, this section will be page aligned on disk. // Typically the first section of each PT_LOAD segment has this flag. bool PageAlign = false; + // Pointer to the first section in PT_LOAD segment, which this section + // also resides in. This field is used to correctly compute file offset + // of a section. When two sections share the same load segment, difference + // between their file offsets should be equal to difference between their + // virtual addresses. To compute some section offset we use the following + // formula: Off = Off_first + VA - VA_first. + OutputSectionBase *FirstInPtLoad = nullptr; + virtual void finalize() {} - virtual void finalizePieces() {} virtual void assignOffsets() {} virtual void writeTo(uint8_t *Buf) {} virtual ~OutputSectionBase() = default; -protected: StringRef Name; - Elf_Shdr Header; -}; -template <class ELFT> class GotSection final : public OutputSectionBase<ELFT> { - typedef OutputSectionBase<ELFT> Base; - typedef typename ELFT::uint uintX_t; - -public: - GotSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - void addEntry(SymbolBody &Sym); - void addMipsEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr); - bool addDynTlsEntry(SymbolBody &Sym); - bool addTlsIndex(); - bool empty() const { return MipsPageEntries == 0 && Entries.empty(); } - uintX_t getMipsLocalPageOffset(uintX_t Addr); - uintX_t getMipsGotOffset(const SymbolBody &B, uintX_t Addend) const; - uintX_t getGlobalDynAddr(const SymbolBody &B) const; - uintX_t getGlobalDynOffset(const SymbolBody &B) const; - uintX_t getNumEntries() const { return Entries.size(); } - - // Returns the symbol which corresponds to the first entry of the global part - // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic - // table properties. - // Returns nullptr if the global part is empty. - const SymbolBody *getMipsFirstGlobalEntry() const; - - // Returns the number of entries in the local part of GOT including - // the number of reserved entries. This method is MIPS-specific. - unsigned getMipsLocalEntriesNum() const; - - // Returns offset of TLS part of the MIPS GOT table. This part goes - // after 'local' and 'global' entries. - uintX_t getMipsTlsOffset(); - - uintX_t getTlsIndexVA() { return Base::getVA() + TlsIndexOff; } - uint32_t getTlsIndexOff() { return TlsIndexOff; } - - // Flag to force GOT to be in output if we have relocations - // that relies on its address. - bool HasGotOffRel = false; - -private: - std::vector<const SymbolBody *> Entries; - uint32_t TlsIndexOff = -1; - uint32_t MipsPageEntries = 0; - // Output sections referenced by MIPS GOT relocations. - llvm::SmallPtrSet<const OutputSectionBase<ELFT> *, 10> MipsOutSections; - llvm::DenseMap<uintX_t, size_t> MipsLocalGotPos; - - // MIPS ABI requires to create unique GOT entry for each Symbol/Addend - // pairs. The `MipsGotMap` maps (S,A) pair to the GOT index in the `MipsLocal` - // or `MipsGlobal` vectors. In general it does not have a sence to take in - // account addend for preemptible symbols because the corresponding - // GOT entries should have one-to-one mapping with dynamic symbols table. - // But we use the same container's types for both kind of GOT entries - // to handle them uniformly. - typedef std::pair<const SymbolBody*, uintX_t> MipsGotEntry; - typedef std::vector<MipsGotEntry> MipsGotEntries; - llvm::DenseMap<MipsGotEntry, size_t> MipsGotMap; - MipsGotEntries MipsLocal; - MipsGotEntries MipsGlobal; - - // Write MIPS-specific parts of the GOT. - void writeMipsGot(uint8_t *&Buf); + // The following fields correspond to Elf_Shdr members. + uint64_t Size = 0; + uint64_t Entsize = 0; + uint64_t Addralign = 0; + uint64_t Offset = 0; + uint64_t Flags = 0; + uint64_t LMAOffset = 0; + uint64_t Addr = 0; + uint32_t ShName = 0; + uint32_t Type = 0; + uint32_t Info = 0; + uint32_t Link = 0; }; -template <class ELFT> -class GotPltSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::uint uintX_t; +template <class ELFT> class OutputSection final : public OutputSectionBase { public: - GotPltSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - void addEntry(SymbolBody &Sym); - bool empty() const; - -private: - std::vector<const SymbolBody *> Entries; -}; - -template <class ELFT> class PltSection final : public OutputSectionBase<ELFT> { - typedef OutputSectionBase<ELFT> Base; - typedef typename ELFT::uint uintX_t; - -public: - PltSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - void addEntry(SymbolBody &Sym); - bool empty() const { return Entries.empty(); } - -private: - std::vector<std::pair<const SymbolBody *, unsigned>> Entries; -}; - -template <class ELFT> class DynamicReloc { - typedef typename ELFT::uint uintX_t; - -public: - DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec, - uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, - uintX_t Addend) - : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec), - UseSymVA(UseSymVA), Addend(Addend) {} - - DynamicReloc(uint32_t Type, const OutputSectionBase<ELFT> *OutputSec, - uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, - uintX_t Addend) - : Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec), - UseSymVA(UseSymVA), Addend(Addend) {} - - uintX_t getOffset() const; - uintX_t getAddend() const; - uint32_t getSymIndex() const; - const OutputSectionBase<ELFT> *getOutputSec() const { return OutputSec; } - - uint32_t Type; - -private: - SymbolBody *Sym; - const InputSectionBase<ELFT> *InputSec = nullptr; - const OutputSectionBase<ELFT> *OutputSec = nullptr; - uintX_t OffsetInSec; - bool UseSymVA; - uintX_t Addend; -}; - -template <class ELFT> -class SymbolTableSection final : public OutputSectionBase<ELFT> { -public: - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::uint uintX_t; - SymbolTableSection(StringTableSection<ELFT> &StrTabSec); - - void finalize() override; - void writeTo(uint8_t *Buf) override; - void addSymbol(SymbolBody *Body); - StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } - unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; } - - ArrayRef<std::pair<SymbolBody *, size_t>> getSymbols() const { - return Symbols; - } - - unsigned NumLocals = 0; - StringTableSection<ELFT> &StrTabSec; - -private: - void writeLocalSymbols(uint8_t *&Buf); - void writeGlobalSymbols(uint8_t *Buf); - - const OutputSectionBase<ELFT> *getOutputSection(SymbolBody *Sym); - - // A vector of symbols and their string table offsets. - std::vector<std::pair<SymbolBody *, size_t>> Symbols; -}; - -// For more information about .gnu.version and .gnu.version_r see: -// https://www.akkadia.org/drepper/symbol-versioning - -// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall -// contain symbol version definitions. The number of entries in this section -// shall be contained in the DT_VERDEFNUM entry of the .dynamic section. -// The section shall contain an array of Elf_Verdef structures, optionally -// followed by an array of Elf_Verdaux structures. -template <class ELFT> -class VersionDefinitionSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Verdef Elf_Verdef; - typedef typename ELFT::Verdaux Elf_Verdaux; - -public: - VersionDefinitionSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - -private: - void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff); - - unsigned FileDefNameOff; -}; - -// The .gnu.version section specifies the required version of each symbol in the -// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol -// table entry. An Elf_Versym is just a 16-bit integer that refers to a version -// identifier defined in the either .gnu.version_r or .gnu.version_d section. -// The values 0 and 1 are reserved. All other values are used for versions in -// the own object or in any of the dependencies. -template <class ELFT> -class VersionTableSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Versym Elf_Versym; - -public: - VersionTableSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; -}; - -// The .gnu.version_r section defines the version identifiers used by -// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each -// Elf_Verneed specifies the version requirements for a single DSO, and contains -// a reference to a linked list of Elf_Vernaux data structures which define the -// mapping from version identifiers to version names. -template <class ELFT> -class VersionNeedSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Verneed Elf_Verneed; - typedef typename ELFT::Vernaux Elf_Vernaux; - - // A vector of shared files that need Elf_Verneed data structures and the - // string table offsets of their sonames. - std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed; - - // The next available version identifier. - unsigned NextIndex; - -public: - VersionNeedSection(); - void addSymbol(SharedSymbol<ELFT> *SS); - void finalize() override; - void writeTo(uint8_t *Buf) override; - size_t getNeedNum() const { return Needed.size(); } -}; - -template <class ELFT> -class RelocationSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::uint uintX_t; - -public: - RelocationSection(StringRef Name, bool Sort); - void addReloc(const DynamicReloc<ELFT> &Reloc); - unsigned getRelocOffset(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - bool hasRelocs() const { return !Relocs.empty(); } - - bool Static = false; - -private: - bool Sort; - std::vector<DynamicReloc<ELFT>> Relocs; -}; - -template <class ELFT> -class OutputSection final : public OutputSectionBase<ELFT> { -public: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Rel Elf_Rel; typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::uint uintX_t; OutputSection(StringRef Name, uint32_t Type, uintX_t Flags); - void addSection(InputSectionBase<ELFT> *C) override; + void addSection(InputSectionData *C) override; + void sort(std::function<int(InputSection<ELFT> *S)> Order); void sortInitFini(); void sortCtorsDtors(); void writeTo(uint8_t *Buf) override; void finalize() override; void assignOffsets() override; + Kind getKind() const override { return Regular; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Regular; + } std::vector<InputSection<ELFT> *> Sections; + + // Location in the output buffer. + uint8_t *Loc = nullptr; }; template <class ELFT> -class MergeOutputSection final : public OutputSectionBase<ELFT> { +class MergeOutputSection final : public OutputSectionBase { typedef typename ELFT::uint uintX_t; public: MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, uintX_t Alignment); - void addSection(InputSectionBase<ELFT> *S) override; + void addSection(InputSectionData *S) override; void writeTo(uint8_t *Buf) override; - unsigned getOffset(StringRef Val); void finalize() override; - void finalizePieces() override; bool shouldTailMerge() const; + Kind getKind() const override { return Merge; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == Merge; + } private: + void finalizeTailMerge(); + void finalizeNoTailMerge(); + llvm::StringTableBuilder Builder; std::vector<MergeInputSection<ELFT> *> Sections; }; struct CieRecord { - SectionPiece *Piece = nullptr; - std::vector<SectionPiece *> FdePieces; + EhSectionPiece *Piece = nullptr; + std::vector<EhSectionPiece *> FdePieces; }; // Output section for .eh_frame. -template <class ELFT> -class EhOutputSection final : public OutputSectionBase<ELFT> { +template <class ELFT> class EhOutputSection final : public OutputSectionBase { typedef typename ELFT::uint uintX_t; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Rel Elf_Rel; @@ -390,7 +169,11 @@ public: void finalize() override; bool empty() const { return Sections.empty(); } - void addSection(InputSectionBase<ELFT> *S) override; + void addSection(InputSectionData *S) override; + Kind getKind() const override { return EHFrame; } + static bool classof(const OutputSectionBase *B) { + return B->getKind() == EHFrame; + } size_t NumFdes = 0; @@ -399,12 +182,10 @@ private: void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels); template <class RelTy> - CieRecord *addCie(SectionPiece &Piece, EhInputSection<ELFT> *Sec, - ArrayRef<RelTy> &Rels); + CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); template <class RelTy> - bool isFdeLive(SectionPiece &Piece, EhInputSection<ELFT> *Sec, - ArrayRef<RelTy> &Rels); + bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc); @@ -415,247 +196,25 @@ private: llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap; }; -template <class ELFT> -class InterpSection final : public OutputSectionBase<ELFT> { -public: - InterpSection(); - void writeTo(uint8_t *Buf) override; -}; - -template <class ELFT> -class StringTableSection final : public OutputSectionBase<ELFT> { -public: - typedef typename ELFT::uint uintX_t; - StringTableSection(StringRef Name, bool Dynamic); - unsigned addString(StringRef S, bool HashIt = true); - void writeTo(uint8_t *Buf) override; - unsigned getSize() const { return Size; } - void finalize() override { this->Header.sh_size = getSize(); } - bool isDynamic() const { return Dynamic; } - -private: - const bool Dynamic; - llvm::DenseMap<StringRef, unsigned> StringMap; - std::vector<StringRef> Strings; - unsigned Size = 1; // ELF string tables start with a NUL byte, so 1. -}; - -template <class ELFT> -class HashTableSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Word Elf_Word; - -public: - HashTableSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; -}; - -// Outputs GNU Hash section. For detailed explanation see: -// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections -template <class ELFT> -class GnuHashTableSection final : public OutputSectionBase<ELFT> { - typedef typename ELFT::Off Elf_Off; - typedef typename ELFT::Word Elf_Word; - typedef typename ELFT::uint uintX_t; - -public: - GnuHashTableSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - - // Adds symbols to the hash table. - // Sorts the input to satisfy GNU hash section requirements. - void addSymbols(std::vector<std::pair<SymbolBody *, size_t>> &Symbols); - -private: - static unsigned calcNBuckets(unsigned NumHashed); - static unsigned calcMaskWords(unsigned NumHashed); - - void writeHeader(uint8_t *&Buf); - void writeBloomFilter(uint8_t *&Buf); - void writeHashTable(uint8_t *Buf); - - struct SymbolData { - SymbolBody *Body; - size_t STName; - uint32_t Hash; - }; - - std::vector<SymbolData> Symbols; - - unsigned MaskWords; - unsigned NBuckets; - unsigned Shift2; -}; - -template <class ELFT> -class DynamicSection final : public OutputSectionBase<ELFT> { - typedef OutputSectionBase<ELFT> Base; - typedef typename ELFT::Dyn Elf_Dyn; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; - - // The .dynamic section contains information for the dynamic linker. - // The section consists of fixed size entries, which consist of - // type and value fields. Value are one of plain integers, symbol - // addresses, or section addresses. This struct represents the entry. - struct Entry { - int32_t Tag; - union { - OutputSectionBase<ELFT> *OutSec; - uint64_t Val; - const SymbolBody *Sym; - }; - enum KindT { SecAddr, SymAddr, PlainInt } Kind; - Entry(int32_t Tag, OutputSectionBase<ELFT> *OutSec) - : Tag(Tag), OutSec(OutSec), Kind(SecAddr) {} - Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {} - Entry(int32_t Tag, const SymbolBody *Sym) - : Tag(Tag), Sym(Sym), Kind(SymAddr) {} - }; - - // finalize() fills this vector with the section contents. finalize() - // cannot directly create final section contents because when the - // function is called, symbol or section addresses are not fixed yet. - std::vector<Entry> Entries; - -public: - explicit DynamicSection(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - - OutputSectionBase<ELFT> *PreInitArraySec = nullptr; - OutputSectionBase<ELFT> *InitArraySec = nullptr; - OutputSectionBase<ELFT> *FiniArraySec = nullptr; -}; - -template <class ELFT> -class MipsReginfoOutputSection final : public OutputSectionBase<ELFT> { - typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; - -public: - MipsReginfoOutputSection(); - void writeTo(uint8_t *Buf) override; - void addSection(InputSectionBase<ELFT> *S) override; - -private: - uint32_t GprMask = 0; -}; - -template <class ELFT> -class MipsOptionsOutputSection final : public OutputSectionBase<ELFT> { - typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; - typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; - -public: - MipsOptionsOutputSection(); - void writeTo(uint8_t *Buf) override; - void addSection(InputSectionBase<ELFT> *S) override; - -private: - uint32_t GprMask = 0; -}; - -// --eh-frame-hdr option tells linker to construct a header for all the -// .eh_frame sections. This header is placed to a section named .eh_frame_hdr -// and also to a PT_GNU_EH_FRAME segment. -// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by -// calling dl_iterate_phdr. -// This section contains a lookup table for quick binary search of FDEs. -// Detailed info about internals can be found in Ian Lance Taylor's blog: -// http://www.airs.com/blog/archives/460 (".eh_frame") -// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") -template <class ELFT> -class EhFrameHeader final : public OutputSectionBase<ELFT> { - typedef typename ELFT::uint uintX_t; - -public: - EhFrameHeader(); - void finalize() override; - void writeTo(uint8_t *Buf) override; - void addFde(uint32_t Pc, uint32_t FdeVA); - -private: - struct FdeData { - uint32_t Pc; - uint32_t FdeVA; - }; - - std::vector<FdeData> Fdes; -}; - -template <class ELFT> class BuildIdSection : public OutputSectionBase<ELFT> { -public: - void writeTo(uint8_t *Buf) override; - virtual void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) = 0; - -protected: - BuildIdSection(size_t HashSize); - size_t HashSize; - uint8_t *HashBuf = nullptr; -}; - -template <class ELFT> class BuildIdFnv1 final : public BuildIdSection<ELFT> { -public: - BuildIdFnv1() : BuildIdSection<ELFT>(8) {} - void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override; -}; - -template <class ELFT> class BuildIdMd5 final : public BuildIdSection<ELFT> { -public: - BuildIdMd5() : BuildIdSection<ELFT>(16) {} - void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override; -}; - -template <class ELFT> class BuildIdSha1 final : public BuildIdSection<ELFT> { -public: - BuildIdSha1() : BuildIdSection<ELFT>(20) {} - void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override; -}; - -template <class ELFT> -class BuildIdHexstring final : public BuildIdSection<ELFT> { -public: - BuildIdHexstring(); - void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override; -}; - // All output sections that are hadnled by the linker specially are // globally accessible. Writer initializes them, so don't use them // until Writer is initialized. template <class ELFT> struct Out { typedef typename ELFT::uint uintX_t; typedef typename ELFT::Phdr Elf_Phdr; - static BuildIdSection<ELFT> *BuildId; - static DynamicSection<ELFT> *Dynamic; - static EhFrameHeader<ELFT> *EhFrameHdr; + + static uint8_t First; static EhOutputSection<ELFT> *EhFrame; - static GnuHashTableSection<ELFT> *GnuHashTab; - static GotPltSection<ELFT> *GotPlt; - static GotSection<ELFT> *Got; - static HashTableSection<ELFT> *HashTab; - static InterpSection<ELFT> *Interp; static OutputSection<ELFT> *Bss; - static OutputSection<ELFT> *MipsRldMap; - static OutputSectionBase<ELFT> *Opd; + static OutputSectionBase *Opd; static uint8_t *OpdBuf; - static PltSection<ELFT> *Plt; - static RelocationSection<ELFT> *RelaDyn; - static RelocationSection<ELFT> *RelaPlt; - static StringTableSection<ELFT> *DynStrTab; - static StringTableSection<ELFT> *ShStrTab; - static StringTableSection<ELFT> *StrTab; - static SymbolTableSection<ELFT> *DynSymTab; - static SymbolTableSection<ELFT> *SymTab; - static VersionDefinitionSection<ELFT> *VerDef; - static VersionTableSection<ELFT> *VerSym; - static VersionNeedSection<ELFT> *VerNeed; - static Elf_Phdr *TlsPhdr; - static OutputSectionBase<ELFT> *ElfHeader; - static OutputSectionBase<ELFT> *ProgramHeaders; + static PhdrEntry *TlsPhdr; + static OutputSectionBase *DebugInfo; + static OutputSectionBase *ElfHeader; + static OutputSectionBase *ProgramHeaders; + static OutputSectionBase *PreinitArray; + static OutputSectionBase *InitArray; + static OutputSectionBase *FiniArray; }; template <bool Is64Bits> struct SectionKey { @@ -676,45 +235,33 @@ template <class ELFT> class OutputSectionFactory { typedef typename elf::SectionKey<ELFT::Is64Bits> Key; public: - std::pair<OutputSectionBase<ELFT> *, bool> create(InputSectionBase<ELFT> *C, - StringRef OutsecName); - - OutputSectionBase<ELFT> *lookup(StringRef Name, uint32_t Type, uintX_t Flags); + std::pair<OutputSectionBase *, bool> create(InputSectionBase<ELFT> *C, + StringRef OutsecName); + std::pair<OutputSectionBase *, bool> + create(const SectionKey<ELFT::Is64Bits> &Key, InputSectionBase<ELFT> *C); private: - Key createKey(InputSectionBase<ELFT> *C, StringRef OutsecName); - - llvm::SmallDenseMap<Key, OutputSectionBase<ELFT> *> Map; + llvm::SmallDenseMap<Key, OutputSectionBase *> Map; }; -template <class ELFT> BuildIdSection<ELFT> *Out<ELFT>::BuildId; -template <class ELFT> DynamicSection<ELFT> *Out<ELFT>::Dynamic; -template <class ELFT> EhFrameHeader<ELFT> *Out<ELFT>::EhFrameHdr; +template <class ELFT> uint64_t getHeaderSize() { + if (Config->OFormatBinary) + return 0; + return Out<ELFT>::ElfHeader->Size + Out<ELFT>::ProgramHeaders->Size; +} + +template <class ELFT> uint8_t Out<ELFT>::First; template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame; -template <class ELFT> GnuHashTableSection<ELFT> *Out<ELFT>::GnuHashTab; -template <class ELFT> GotPltSection<ELFT> *Out<ELFT>::GotPlt; -template <class ELFT> GotSection<ELFT> *Out<ELFT>::Got; -template <class ELFT> HashTableSection<ELFT> *Out<ELFT>::HashTab; -template <class ELFT> InterpSection<ELFT> *Out<ELFT>::Interp; template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss; -template <class ELFT> OutputSection<ELFT> *Out<ELFT>::MipsRldMap; -template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::Opd; +template <class ELFT> OutputSectionBase *Out<ELFT>::Opd; template <class ELFT> uint8_t *Out<ELFT>::OpdBuf; -template <class ELFT> PltSection<ELFT> *Out<ELFT>::Plt; -template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaDyn; -template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaPlt; -template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::DynStrTab; -template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::ShStrTab; -template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::StrTab; -template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::DynSymTab; -template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::SymTab; -template <class ELFT> VersionDefinitionSection<ELFT> *Out<ELFT>::VerDef; -template <class ELFT> VersionTableSection<ELFT> *Out<ELFT>::VerSym; -template <class ELFT> VersionNeedSection<ELFT> *Out<ELFT>::VerNeed; -template <class ELFT> typename ELFT::Phdr *Out<ELFT>::TlsPhdr; -template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ElfHeader; -template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ProgramHeaders; - +template <class ELFT> PhdrEntry *Out<ELFT>::TlsPhdr; +template <class ELFT> OutputSectionBase *Out<ELFT>::DebugInfo; +template <class ELFT> OutputSectionBase *Out<ELFT>::ElfHeader; +template <class ELFT> OutputSectionBase *Out<ELFT>::ProgramHeaders; +template <class ELFT> OutputSectionBase *Out<ELFT>::PreinitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::InitArray; +template <class ELFT> OutputSectionBase *Out<ELFT>::FiniArray; } // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp index c09cf6b2b1ef..f7dcc5d24e93 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.cpp +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -44,7 +44,9 @@ #include "Relocations.h" #include "Config.h" #include "OutputSections.h" +#include "Strings.h" #include "SymbolTable.h" +#include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" @@ -60,11 +62,10 @@ namespace lld { namespace elf { static bool refersToGotEntry(RelExpr Expr) { - return Expr == R_GOT || Expr == R_GOT_OFF || Expr == R_MIPS_GOT_LOCAL_PAGE || - Expr == R_MIPS_GOT_OFF || Expr == R_MIPS_TLSGD || - Expr == R_MIPS_TLSLD || Expr == R_GOT_PAGE_PC || Expr == R_GOT_PC || - Expr == R_GOT_FROM_END || Expr == R_TLSGD || Expr == R_TLSGD_PC || - Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE; + return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, R_MIPS_TLSGD, R_MIPS_TLSLD, + R_GOT_PAGE_PC, R_GOT_PC, R_GOT_FROM_END, R_TLSGD, + R_TLSGD_PC, R_TLSDESC, R_TLSDESC_PAGE>(Expr); } static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { @@ -83,34 +84,46 @@ static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { return Body.isPreemptible(); } -// This function is similar to the `handleTlsRelocation`. MIPS does not support -// any relaxations for TLS relocations so by factoring out MIPS handling into -// the separate function we can simplify the code and does not pollute -// `handleTlsRelocation` by MIPS `ifs` statements. -template <class ELFT> -static unsigned -handleMipsTlsRelocation(uint32_t Type, SymbolBody &Body, - InputSectionBase<ELFT> &C, typename ELFT::uint Offset, - typename ELFT::uint Addend, RelExpr Expr) { - if (Expr == R_MIPS_TLSLD) { - if (Out<ELFT>::Got->addTlsIndex()) - Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got, - Out<ELFT>::Got->getTlsIndexOff(), false, - nullptr, 0}); - C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); +// This function is similar to the `handleTlsRelocation`. ARM and MIPS do not +// support any relaxations for TLS relocations so by factoring out ARM and MIPS +// handling in to the separate function we can simplify the code and do not +// pollute `handleTlsRelocation` by ARM and MIPS `ifs` statements. +template <class ELFT, class GOT> +static unsigned handleNoRelaxTlsRelocation( + GOT *Got, uint32_t Type, SymbolBody &Body, InputSectionBase<ELFT> &C, + typename ELFT::uint Offset, typename ELFT::uint Addend, RelExpr Expr) { + typedef typename ELFT::uint uintX_t; + auto addModuleReloc = [](SymbolBody &Body, GOT *Got, uintX_t Off, bool LD) { + // The Dynamic TLS Module Index Relocation can be statically resolved to 1 + // if we know that we are linking an executable. For ARM we resolve the + // relocation when writing the Got. MIPS has a custom Got implementation + // that writes the Module index in directly. + if (!Body.isPreemptible() && !Config->Pic && Config->EMachine == EM_ARM) + Got->Relocations.push_back( + {R_ABS, Target->TlsModuleIndexRel, Off, 0, &Body}); + else { + SymbolBody *Dest = LD ? nullptr : &Body; + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, Got, Off, false, Dest, 0}); + } + }; + if (Expr == R_MIPS_TLSLD || Expr == R_TLSLD_PC) { + if (Got->addTlsIndex() && (Config->Pic || Config->EMachine == EM_ARM)) + addModuleReloc(Body, Got, Got->getTlsIndexOff(), true); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } if (Target->isTlsGlobalDynamicRel(Type)) { - if (Out<ELFT>::Got->addDynTlsEntry(Body)) { - typedef typename ELFT::uint uintX_t; - uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body); - Out<ELFT>::RelaDyn->addReloc( - {Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0}); - Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got, - Off + (uintX_t)sizeof(uintX_t), false, - &Body, 0}); + if (Got->addDynTlsEntry(Body) && + (Body.isPreemptible() || Config->EMachine == EM_ARM)) { + uintX_t Off = Got->getGlobalDynOffset(Body); + addModuleReloc(Body, Got, Off, false); + if (Body.isPreemptible()) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Got, + Off + (uintX_t)sizeof(uintX_t), false, + &Body, 0}); } - C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } return 0; @@ -122,7 +135,7 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase<ELFT> &C, typename ELFT::uint Offset, typename ELFT::uint Addend, RelExpr Expr) { - if (!(C.getSectionHdr()->sh_flags & SHF_ALLOC)) + if (!(C.Flags & SHF_ALLOC)) return 0; if (!Body.isTls()) @@ -130,18 +143,23 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, typedef typename ELFT::uint uintX_t; + if (Config->EMachine == EM_ARM) + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::Got, Type, Body, C, + Offset, Addend, Expr); if (Config->EMachine == EM_MIPS) - return handleMipsTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr); + return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::MipsGot, Type, Body, C, + Offset, Addend, Expr); - if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_HINT) && + bool IsPreemptible = isPreemptible(Body, Type); + if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC_CALL) && Config->Shared) { - if (Out<ELFT>::Got->addDynTlsEntry(Body)) { - uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body); - Out<ELFT>::RelaDyn->addReloc( - {Target->TlsDescRel, Out<ELFT>::Got, Off, false, &Body, 0}); + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsDescRel, In<ELFT>::Got, Off, + !IsPreemptible, &Body, 0}); } - if (Expr != R_HINT) - C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); + if (Expr != R_TLSDESC_CALL) + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } @@ -149,69 +167,71 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, // Local-Dynamic relocs can be relaxed to Local-Exec. if (!Config->Shared) { C.Relocations.push_back( - {R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body}); + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); return 2; } - if (Out<ELFT>::Got->addTlsIndex()) - Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got, - Out<ELFT>::Got->getTlsIndexOff(), false, - nullptr, 0}); - C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); + if (In<ELFT>::Got->addTlsIndex()) + In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::Got, + In<ELFT>::Got->getTlsIndexOff(), false, + nullptr, 0}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } // Local-Dynamic relocs can be relaxed to Local-Exec. if (Target->isTlsLocalDynamicRel(Type) && !Config->Shared) { C.Relocations.push_back( - {R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body}); + {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body}); return 1; } - if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_HINT || + if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_TLSDESC_CALL || Target->isTlsGlobalDynamicRel(Type)) { if (Config->Shared) { - if (Out<ELFT>::Got->addDynTlsEntry(Body)) { - uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body); - Out<ELFT>::RelaDyn->addReloc( - {Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0}); + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + In<ELFT>::RelaDyn->addReloc( + {Target->TlsModuleIndexRel, In<ELFT>::Got, Off, false, &Body, 0}); // If the symbol is preemptible we need the dynamic linker to write // the offset too. - if (isPreemptible(Body, Type)) - Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got, - Off + (uintX_t)sizeof(uintX_t), false, - &Body, 0}); + uintX_t OffsetOff = Off + (uintX_t)sizeof(uintX_t); + if (IsPreemptible) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::Got, + OffsetOff, false, &Body, 0}); + else + In<ELFT>::Got->Relocations.push_back( + {R_ABS, Target->TlsOffsetRel, OffsetOff, 0, &Body}); } - C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. - if (isPreemptible(Body, Type)) { + if (IsPreemptible) { C.Relocations.push_back( {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type, - &C, Offset, Addend, &Body}); + Offset, Addend, &Body}); if (!Body.isInGot()) { - Out<ELFT>::Got->addEntry(Body); - Out<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, Out<ELFT>::Got, - Body.getGotOffset<ELFT>(), false, &Body, - 0}); + In<ELFT>::Got->addEntry(Body); + In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::Got, + Body.getGotOffset<ELFT>(), false, &Body, + 0}); } return Target->TlsGdRelaxSkip; } C.Relocations.push_back( - {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, &C, + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, Offset, Addend, &Body}); return Target->TlsGdRelaxSkip; } // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally // defined. - if (Target->isTlsInitialExecRel(Type) && !Config->Shared && - !isPreemptible(Body, Type)) { + if (Target->isTlsInitialExecRel(Type) && !Config->Shared && !IsPreemptible) { C.Relocations.push_back( - {R_RELAX_TLS_IE_TO_LE, Type, &C, Offset, Addend, &Body}); + {R_RELAX_TLS_IE_TO_LE, Type, Offset, Addend, &Body}); return 1; } return 0; @@ -260,8 +280,8 @@ static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc, return ((read32<E>(BufLoc) & 0xffff) << 16) + readSignedLo16<E>(Buf + RI->r_offset); } - warning("can't find matching " + getRelName(Type) + " relocation for " + - getRelName(Rel->getType(Config->Mips64EL))); + warn("can't find matching " + toString(Type) + " relocation for " + + toString(Rel->getType(Config->Mips64EL))); return 0; } @@ -275,27 +295,34 @@ template <class ELFT> static bool isAbsolute(const SymbolBody &Body) { return false; } +template <class ELFT> static bool isAbsoluteValue(const SymbolBody &Body) { + return isAbsolute<ELFT>(Body) || Body.isTls(); +} + static bool needsPlt(RelExpr Expr) { - return Expr == R_PLT_PC || Expr == R_PPC_PLT_OPD || Expr == R_PLT || - Expr == R_PLT_PAGE_PC || Expr == R_THUNK_PLT_PC; + return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC, + R_THUNK_PLT_PC>(Expr); } // True if this expression is of the form Sym - X, where X is a position in the // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { - return Expr == R_PC || Expr == R_GOTREL || Expr == R_PAGE_PC || - Expr == R_RELAX_GOT_PC || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC; + return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL, + R_PAGE_PC, R_RELAX_GOT_PC, R_THUNK_PC, R_THUNK_PLT_PC>( + Expr); } template <class ELFT> static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, - const SymbolBody &Body) { + const SymbolBody &Body, + InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { // These expressions always compute a constant - if (E == R_SIZE || E == R_GOT_FROM_END || E == R_GOT_OFF || - E == R_MIPS_GOT_LOCAL_PAGE || E == R_MIPS_GOT_OFF || E == R_MIPS_TLSGD || - E == R_GOT_PAGE_PC || E == R_GOT_PC || E == R_PLT_PC || E == R_TLSGD_PC || - E == R_TLSGD || E == R_PPC_PLT_OPD || E == R_TLSDESC_PAGE || - E == R_HINT || E == R_THUNK_PC || E == R_THUNK_PLT_PC) + if (isRelExprOneOf<R_SIZE, R_GOT_FROM_END, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_TLSGD, + R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, R_TLSGD_PC, R_TLSGD, + R_PPC_PLT_OPD, R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT, + R_THUNK_PC, R_THUNK_PLT_PC>(E)) return true; // These never do, except if the entire file is position dependent or if @@ -309,7 +336,7 @@ static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, if (!Config->Pic) return true; - bool AbsVal = isAbsolute<ELFT>(Body) || Body.isTls(); + bool AbsVal = isAbsoluteValue<ELFT>(Body); bool RelE = isRelExpr(E); if (AbsVal && !RelE) return true; @@ -321,11 +348,17 @@ static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, // resolve to the image base. This is a little strange, but it allows us to // link function calls to such symbols. Normally such a call will be guarded // with a comparison, which will load a zero from the GOT. + // Another special case is MIPS _gp_disp symbol which represents offset + // between start of a function and '_gp' value and defined as absolute just + // to simplify the code. if (AbsVal && RelE) { if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) return true; - error("relocation " + getRelName(Type) + - " cannot refer to absolute symbol " + Body.getName()); + if (&Body == ElfSym<ELFT>::MipsGpDisp) + return true; + error(S.getLocation(RelOff) + ": relocation " + toString(Type) + + " cannot refer to absolute symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); return true; } @@ -374,18 +407,18 @@ template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) { // Copy relocation against zero-sized symbol doesn't make sense. uintX_t SymSize = SS->template getSize<ELFT>(); if (SymSize == 0) - fatal("cannot create a copy relocation for " + SS->getName()); + fatal("cannot create a copy relocation for symbol " + toString(*SS)); uintX_t Alignment = getAlignment(SS); - uintX_t Off = alignTo(Out<ELFT>::Bss->getSize(), Alignment); - Out<ELFT>::Bss->setSize(Off + SymSize); + uintX_t Off = alignTo(Out<ELFT>::Bss->Size, Alignment); + Out<ELFT>::Bss->Size = Off + SymSize; Out<ELFT>::Bss->updateAlignment(Alignment); uintX_t Shndx = SS->Sym.st_shndx; uintX_t Value = SS->Sym.st_value; // Look through the DSO's dynamic symbol table for aliases and create a // dynamic symbol for each one. This causes the copy relocation to correctly // interpose any aliases. - for (const Elf_Sym &S : SS->file()->getElfSymbols(true)) { + for (const Elf_Sym &S : SS->file()->getGlobalSymbols()) { if (S.st_shndx != Shndx || S.st_value != Value) continue; auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>( @@ -396,38 +429,43 @@ template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) { Alias->NeedsCopyOrPltAddr = true; Alias->symbol()->IsUsedInRegularObj = true; } - Out<ELFT>::RelaDyn->addReloc( + In<ELFT>::RelaDyn->addReloc( {Target->CopyRel, Out<ELFT>::Bss, SS->OffsetInBss, false, SS, 0}); } template <class ELFT> static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, bool IsWrite, RelExpr Expr, uint32_t Type, - const uint8_t *Data) { + const uint8_t *Data, InputSectionBase<ELFT> &S, + typename ELFT::uint RelOff) { bool Preemptible = isPreemptible(Body, Type); if (Body.isGnuIFunc()) { Expr = toPlt(Expr); } else if (!Preemptible) { if (needsPlt(Expr)) Expr = fromPlt(Expr); - if (Expr == R_GOT_PC) + if (Expr == R_GOT_PC && !isAbsoluteValue<ELFT>(Body)) Expr = Target->adjustRelaxExpr(Type, Data, Expr); } Expr = Target->getThunkExpr(Expr, Type, File, Body); - if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body)) + if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, S, RelOff)) return Expr; // This relocation would require the dynamic linker to write a value to read // only memory. We can hack around it if we are producing an executable and // the refered symbol can be preemepted to refer to the executable. if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) { - error("can't create dynamic relocation " + getRelName(Type) + - " against readonly segment"); + error(S.getLocation(RelOff) + ": can't create dynamic relocation " + + toString(Type) + " against " + + (Body.getName().empty() ? "local symbol in readonly segment" + : "symbol '" + toString(Body) + "'") + + " defined in " + toString(Body.File)); return Expr; } if (Body.getVisibility() != STV_DEFAULT) { - error("cannot preempt symbol"); + error(S.getLocation(RelOff) + ": cannot preempt symbol '" + toString(Body) + + "' defined in " + toString(Body.File)); return Expr; } if (Body.isObject()) { @@ -461,7 +499,8 @@ static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, Body.NeedsCopyOrPltAddr = true; return toPlt(Expr); } - error("symbol is missing type"); + error("symbol '" + toString(Body) + "' defined in " + toString(Body.File) + + " is missing type"); return Expr; } @@ -487,17 +526,49 @@ static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File, // For details see p. 4-19 at // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf Addend += 4; - if (Expr == R_GOTREL) { - Addend -= MipsGPOffset; - if (Body.isLocal()) - Addend += File.getMipsGp0(); - } + if (Expr == R_MIPS_GOTREL && Body.isLocal()) + Addend += File.MipsGp0; } if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC) Addend += getPPC64TocBase(); return Addend; } +template <class ELFT> +static void reportUndefined(SymbolBody &Sym, InputSectionBase<ELFT> &S, + typename ELFT::uint Offset) { + if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore) + return; + + if (Config->Shared && Sym.symbol()->Visibility == STV_DEFAULT && + Config->UnresolvedSymbols != UnresolvedPolicy::NoUndef) + return; + + std::string Msg = + S.getLocation(Offset) + ": undefined symbol '" + toString(Sym) + "'"; + + if (Config->UnresolvedSymbols == UnresolvedPolicy::Warn) + warn(Msg); + else + error(Msg); +} + +template <class RelTy> +static std::pair<uint32_t, uint32_t> +mergeMipsN32RelTypes(uint32_t Type, uint32_t Offset, RelTy *I, RelTy *E) { + // MIPS N32 ABI treats series of successive relocations with the same offset + // as a single relocation. The similar approach used by N64 ABI, but this ABI + // packs all relocations into the single relocation record. Here we emulate + // this for the N32 ABI. Iterate over relocation with the same offset and put + // theirs types into the single bit-set. + uint32_t Processed = 0; + for (; I != E && Offset == I->r_offset; ++I) { + ++Processed; + Type |= I->getType(Config->Mips64EL) << (8 * Processed); + } + return std::make_pair(Type, Processed); +} + // The reason we have to do this early scan is as follows // * To mmap the output file, we need to know the size // * For that, we need to know how many dynamic relocs we will have. @@ -515,53 +586,90 @@ template <class ELFT, class RelTy> static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { typedef typename ELFT::uint uintX_t; - bool IsWrite = C.getSectionHdr()->sh_flags & SHF_WRITE; + bool IsWrite = C.Flags & SHF_WRITE; auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) { - Out<ELFT>::RelaDyn->addReloc(Reloc); + In<ELFT>::RelaDyn->addReloc(Reloc); }; - const elf::ObjectFile<ELFT> &File = *C.getFile(); - ArrayRef<uint8_t> SectionData = C.getSectionData(); + const elf::ObjectFile<ELFT> *File = C.getFile(); + ArrayRef<uint8_t> SectionData = C.Data; const uint8_t *Buf = SectionData.begin(); + + ArrayRef<EhSectionPiece> Pieces; + if (auto *Eh = dyn_cast<EhInputSection<ELFT>>(&C)) + Pieces = Eh->Pieces; + + ArrayRef<EhSectionPiece>::iterator PieceI = Pieces.begin(); + ArrayRef<EhSectionPiece>::iterator PieceE = Pieces.end(); + for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) { const RelTy &RI = *I; - SymbolBody &Body = File.getRelocTargetSym(RI); + SymbolBody &Body = File->getRelocTargetSym(RI); uint32_t Type = RI.getType(Config->Mips64EL); + if (Config->MipsN32Abi) { + uint32_t Processed; + std::tie(Type, Processed) = + mergeMipsN32RelTypes(Type, RI.r_offset, I + 1, E); + I += Processed; + } + + // We only report undefined symbols if they are referenced somewhere in the + // code. + if (!Body.isLocal() && Body.isUndefined() && !Body.symbol()->isWeak()) + reportUndefined(Body, C, RI.r_offset); + RelExpr Expr = Target->getRelExpr(Type, Body); bool Preemptible = isPreemptible(Body, Type); - Expr = adjustExpr(File, Body, IsWrite, Expr, Type, Buf + RI.r_offset); - if (HasError) + Expr = adjustExpr(*File, Body, IsWrite, Expr, Type, Buf + RI.r_offset, C, + RI.r_offset); + if (ErrorCount) continue; // Skip a relocation that points to a dead piece - // in a mergeable section. - if (C.getOffset(RI.r_offset) == (uintX_t)-1) - continue; + // in a eh_frame section. + while (PieceI != PieceE && + (PieceI->InputOff + PieceI->size() <= RI.r_offset)) + ++PieceI; + + // Compute the offset of this section in the output section. We do it here + // to try to compute it only once. + uintX_t Offset; + if (PieceI != PieceE) { + assert(PieceI->InputOff <= RI.r_offset && "Relocation not in any piece"); + if (PieceI->OutputOff == -1) + continue; + Offset = PieceI->OutputOff + RI.r_offset - PieceI->InputOff; + } else { + Offset = RI.r_offset; + } // This relocation does not require got entry, but it is relative to got and // needs it to be created. Here we request for that. - if (Expr == R_GOTONLY_PC || Expr == R_GOTREL || Expr == R_PPC_TOC) - Out<ELFT>::Got->HasGotOffRel = true; + if (Expr == R_GOTONLY_PC || Expr == R_GOTONLY_PC_FROM_END || + Expr == R_GOTREL || Expr == R_GOTREL_FROM_END || Expr == R_PPC_TOC) + In<ELFT>::Got->HasGotOffRel = true; - uintX_t Addend = computeAddend(File, Buf, E, RI, Expr, Body); + uintX_t Addend = computeAddend(*File, Buf, E, RI, Expr, Body); - if (unsigned Processed = handleTlsRelocation<ELFT>( - Type, Body, C, RI.r_offset, Addend, Expr)) { + if (unsigned Processed = + handleTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr)) { I += (Processed - 1); continue; } - // Ignore "hint" relocation because it is for optional code optimization. - if (Expr == R_HINT) + // Ignore "hint" and TLS Descriptor call relocation because they are + // only markers for relaxation. + if (isRelExprOneOf<R_HINT, R_TLSDESC_CALL>(Expr)) continue; - if (needsPlt(Expr) || Expr == R_THUNK_ABS || Expr == R_THUNK_PC || - Expr == R_THUNK_PLT_PC || refersToGotEntry(Expr) || - !isPreemptible(Body, Type)) { + if (needsPlt(Expr) || + isRelExprOneOf<R_THUNK_ABS, R_THUNK_PC, R_THUNK_PLT_PC>(Expr) || + refersToGotEntry(Expr) || !isPreemptible(Body, Type)) { // If the relocation points to something in the file, we can process it. - bool Constant = isStaticLinkTimeConstant<ELFT>(Expr, Type, Body); + bool Constant = + isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, C, RI.r_offset); // If the output being produced is position independent, the final value // is still not known. In that case we still need some help from the @@ -569,17 +677,21 @@ static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { // relocation. We can process some of it and and just ask the dynamic // linker to add the load address. if (!Constant) - AddDyn({Target->RelativeRel, &C, RI.r_offset, true, &Body, Addend}); + AddDyn({Target->RelativeRel, &C, Offset, true, &Body, Addend}); // If the produced value is a constant, we just remember to write it // when outputting this section. We also have to do it if the format // uses Elf_Rel, since in that case the written value is the addend. if (Constant || !RelTy::IsRela) - C.Relocations.push_back({Expr, Type, &C, RI.r_offset, Addend, &Body}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); } else { // We don't know anything about the finaly symbol. Just ask the dynamic // linker to handle the relocation for us. - AddDyn({Target->getDynRel(Type), &C, RI.r_offset, false, &Body, Addend}); + if (!Target->isPicRel(Type)) + error(C.getLocation(Offset) + ": relocation " + toString(Type) + + " cannot be used against shared object; recompile with -fPIC."); + AddDyn({Target->getDynRel(Type), &C, Offset, false, &Body, Addend}); + // MIPS ABI turns using of GOT and dynamic relocations inside out. // While regular ABI uses dynamic relocations to fill up GOT entries // MIPS ABI requires dynamic linker to fills up GOT entries using @@ -596,18 +708,10 @@ static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { // a dynamic relocation. // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19 if (Config->EMachine == EM_MIPS) - Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr); + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); continue; } - // Some targets might require creation of thunks for relocations. - // Now we support only MIPS which requires LA25 thunk to call PIC - // code from non-PIC one, and ARM which requires interworking. - if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { - auto *Sec = cast<InputSection<ELFT>>(&C); - addThunk<ELFT>(Type, Body, *Sec); - } - // At this point we are done with the relocated position. Some relocations // also require us to create a got or plt entry. @@ -615,90 +719,107 @@ static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { if (needsPlt(Expr)) { if (Body.isInPlt()) continue; - Out<ELFT>::Plt->addEntry(Body); - - uint32_t Rel; - if (Body.isGnuIFunc() && !Preemptible) - Rel = Target->IRelativeRel; - else - Rel = Target->PltRel; - Out<ELFT>::GotPlt->addEntry(Body); - Out<ELFT>::RelaPlt->addReloc({Rel, Out<ELFT>::GotPlt, - Body.getGotPltOffset<ELFT>(), !Preemptible, - &Body, 0}); + if (Body.isGnuIFunc() && !Preemptible) { + In<ELFT>::Iplt->addEntry(Body); + In<ELFT>::IgotPlt->addEntry(Body); + In<ELFT>::RelaIplt->addReloc({Target->IRelativeRel, In<ELFT>::IgotPlt, + Body.getGotPltOffset<ELFT>(), + !Preemptible, &Body, 0}); + } else { + In<ELFT>::Plt->addEntry(Body); + In<ELFT>::GotPlt->addEntry(Body); + In<ELFT>::RelaPlt->addReloc({Target->PltRel, In<ELFT>::GotPlt, + Body.getGotPltOffset<ELFT>(), !Preemptible, + &Body, 0}); + } continue; } if (refersToGotEntry(Expr)) { if (Config->EMachine == EM_MIPS) { - // MIPS ABI has special rules to process GOT entries - // and doesn't require relocation entries for them. + // MIPS ABI has special rules to process GOT entries and doesn't + // require relocation entries for them. A special case is TLS + // relocations. In that case dynamic loader applies dynamic + // relocations to initialize TLS GOT entries. // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr); - if (Body.isTls()) - AddDyn({Target->TlsGotRel, Out<ELFT>::Got, Body.getGotOffset<ELFT>(), - !Preemptible, &Body, 0}); + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + if (Body.isTls() && Body.isPreemptible()) + AddDyn({Target->TlsGotRel, In<ELFT>::MipsGot, + Body.getGotOffset<ELFT>(), false, &Body, 0}); continue; } if (Body.isInGot()) continue; - Out<ELFT>::Got->addEntry(Body); - if (Preemptible || (Config->Pic && !isAbsolute<ELFT>(Body))) { - uint32_t DynType; - if (Body.isTls()) - DynType = Target->TlsGotRel; - else if (Preemptible) - DynType = Target->GotRel; - else - DynType = Target->RelativeRel; - AddDyn({DynType, Out<ELFT>::Got, Body.getGotOffset<ELFT>(), - !Preemptible, &Body, 0}); - } + In<ELFT>::Got->addEntry(Body); + uintX_t Off = Body.getGotOffset<ELFT>(); + uint32_t DynType; + RelExpr GotRE = R_ABS; + if (Body.isTls()) { + DynType = Target->TlsGotRel; + GotRE = R_TLS; + } else if (!Preemptible && Config->Pic && !isAbsolute<ELFT>(Body)) + DynType = Target->RelativeRel; + else + DynType = Target->GotRel; + + // FIXME: this logic is almost duplicated above. + bool Constant = !Preemptible && !(Config->Pic && !isAbsolute<ELFT>(Body)); + if (!Constant) + AddDyn({DynType, In<ELFT>::Got, Off, !Preemptible, &Body, 0}); + if (Constant || (!RelTy::IsRela && !Preemptible)) + In<ELFT>::Got->Relocations.push_back({GotRE, DynType, Off, 0, &Body}); continue; } } } -template <class ELFT> void scanRelocations(InputSection<ELFT> &C) { - typedef typename ELFT::Shdr Elf_Shdr; - - // Scan all relocations. Each relocation goes through a series - // of tests to determine if it needs special treatment, such as - // creating GOT, PLT, copy relocations, etc. - // Note that relocations for non-alloc sections are directly - // processed by InputSection::relocateNonAlloc. - if (C.getSectionHdr()->sh_flags & SHF_ALLOC) - for (const Elf_Shdr *RelSec : C.RelocSections) - scanRelocations(C, *RelSec); +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + scanRelocs(S, S.relas()); + else + scanRelocs(S, S.rels()); } -template <class ELFT> -void scanRelocations(InputSectionBase<ELFT> &S, - const typename ELFT::Shdr &RelSec) { - ELFFile<ELFT> &EObj = S.getFile()->getObj(); - if (RelSec.sh_type == SHT_RELA) - scanRelocs(S, EObj.relas(&RelSec)); +template <class ELFT, class RelTy> +static void createThunks(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { + const elf::ObjectFile<ELFT> *File = C.getFile(); + for (const RelTy &Rel : Rels) { + SymbolBody &Body = File->getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->Mips64EL); + RelExpr Expr = Target->getRelExpr(Type, Body); + if (!isPreemptible(Body, Type) && needsPlt(Expr)) + Expr = fromPlt(Expr); + Expr = Target->getThunkExpr(Expr, Type, *File, Body); + // Some targets might require creation of thunks for relocations. + // Now we support only MIPS which requires LA25 thunk to call PIC + // code from non-PIC one, and ARM which requires interworking. + if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { + auto *Sec = cast<InputSection<ELFT>>(&C); + addThunk<ELFT>(Type, Body, *Sec); + } + } +} + +template <class ELFT> void createThunks(InputSectionBase<ELFT> &S) { + if (S.AreRelocsRela) + createThunks(S, S.relas()); else - scanRelocs(S, EObj.rels(&RelSec)); + createThunks(S, S.rels()); } -template void scanRelocations<ELF32LE>(InputSection<ELF32LE> &); -template void scanRelocations<ELF32BE>(InputSection<ELF32BE> &); -template void scanRelocations<ELF64LE>(InputSection<ELF64LE> &); -template void scanRelocations<ELF64BE>(InputSection<ELF64BE> &); - -template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &, - const ELF32LE::Shdr &); -template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &, - const ELF32BE::Shdr &); -template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &, - const ELF64LE::Shdr &); -template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &, - const ELF64BE::Shdr &); +template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &); +template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &); +template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &); +template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &); + +template void createThunks<ELF32LE>(InputSectionBase<ELF32LE> &); +template void createThunks<ELF32BE>(InputSectionBase<ELF32BE> &); +template void createThunks<ELF64LE>(InputSectionBase<ELF64LE> &); +template void createThunks<ELF64BE>(InputSectionBase<ELF64BE> &); } } diff --git a/contrib/llvm/tools/lld/ELF/Relocations.h b/contrib/llvm/tools/lld/ELF/Relocations.h index 4c1c74efb0da..b5825bdd5e59 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.h +++ b/contrib/llvm/tools/lld/ELF/Relocations.h @@ -15,14 +15,20 @@ namespace lld { namespace elf { class SymbolBody; +class InputSectionData; template <class ELFT> class InputSection; template <class ELFT> class InputSectionBase; +// List of target-independent relocation types. Relocations read +// from files are converted to these types so that the main code +// doesn't have to know about architecture-specific details. enum RelExpr { R_ABS, R_GOT, R_GOTONLY_PC, + R_GOTONLY_PC_FROM_END, R_GOTREL, + R_GOTREL_FROM_END, R_GOT_FROM_END, R_GOT_OFF, R_GOT_PAGE_PC, @@ -30,6 +36,8 @@ enum RelExpr { R_HINT, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, + R_MIPS_GOTREL, R_MIPS_TLSGD, R_MIPS_TLSLD, R_NEG_TLS, @@ -58,25 +66,54 @@ enum RelExpr { R_TLS, R_TLSDESC, R_TLSDESC_PAGE, + R_TLSDESC_CALL, R_TLSGD, R_TLSGD_PC, R_TLSLD, - R_TLSLD_PC + R_TLSLD_PC, }; -template <class ELFT> struct Relocation { +// Build a bitmask with one bit set for each RelExpr. +// +// Constexpr function arguments can't be used in static asserts, so we +// use template arguments to build the mask. +// But function template partial specializations don't exist (needed +// for base case of the recursion), so we need a dummy struct. +template <RelExpr... Exprs> struct RelExprMaskBuilder { + static inline uint64_t build() { return 0; } +}; + +// Specialization for recursive case. +template <RelExpr Head, RelExpr... Tail> +struct RelExprMaskBuilder<Head, Tail...> { + static inline uint64_t build() { + static_assert(0 <= Head && Head < 64, + "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build(); + } +}; + +// Return true if `Expr` is one of `Exprs`. +// There are fewer than 64 RelExpr's, so we can represent any set of +// RelExpr's as a constant bit mask and test for membership with a +// couple cheap bitwise operations. +template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) { + assert(0 <= Expr && (int)Expr < 64 && "RelExpr is too large for 64-bit mask!"); + return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build(); +} + +// Architecture-neutral representation of relocation. +struct Relocation { RelExpr Expr; uint32_t Type; - InputSectionBase<ELFT> *InputSec; uint64_t Offset; uint64_t Addend; SymbolBody *Sym; }; -template <class ELFT> void scanRelocations(InputSection<ELFT> &); +template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &); -template <class ELFT> -void scanRelocations(InputSectionBase<ELFT> &, const typename ELFT::Shdr &); +template <class ELFT> void createThunks(InputSectionBase<ELFT> &); template <class ELFT> static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) { diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp index 559ec1be0e39..c740685a15a1 100644 --- a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp @@ -20,69 +20,101 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -// Returns the line that the character S[Pos] is in. -static StringRef getLine(StringRef S, size_t Pos) { - size_t Begin = S.rfind('\n', Pos); - size_t End = S.find('\n', Pos); - Begin = (Begin == StringRef::npos) ? 0 : Begin + 1; - if (End == StringRef::npos) - End = S.size(); - // rtrim for DOS-style newlines. - return S.substr(Begin, End - Begin).rtrim(); +// Returns a whole line containing the current token. +StringRef ScriptParserBase::getLine() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + + size_t Pos = S.rfind('\n', Tok.data() - S.data()); + if (Pos != StringRef::npos) + S = S.substr(Pos + 1); + return S.substr(0, S.find_first_of("\r\n")); +} + +// Returns 1-based line number of the current token. +size_t ScriptParserBase::getLineNumber() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + return S.substr(0, Tok.data() - S.data()).count('\n') + 1; } -void ScriptParserBase::printErrorPos() { - StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1]; - StringRef Line = getLine(Input, Tok.data() - Input.data()); - size_t Col = Tok.data() - Line.data(); - error(Line); - error(std::string(Col, ' ') + "^"); +// Returns 0-based column number of the current token. +size_t ScriptParserBase::getColumnNumber() { + StringRef Tok = Tokens[Pos - 1]; + return Tok.data() - getLine().data(); } +std::string ScriptParserBase::getCurrentLocation() { + std::string Filename = getCurrentMB().getBufferIdentifier(); + if (!Pos) + return Filename; + return (Filename + ":" + Twine(getLineNumber())).str(); +} + +ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } + // We don't want to record cascading errors. Keep only the first one. void ScriptParserBase::setError(const Twine &Msg) { if (Error) return; - if (Input.empty() || Tokens.empty()) { - error(Msg); - } else { - error("line " + Twine(getPos()) + ": " + Msg); - printErrorPos(); - } Error = true; + + if (!Pos) { + error(getCurrentLocation() + ": " + Msg); + return; + } + + std::string S = getCurrentLocation() + ": "; + error(S + Msg); + error(S + getLine()); + error(S + std::string(getColumnNumber(), ' ') + "^"); } // Split S into linker script tokens. -std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) { - std::vector<StringRef> Ret; +void ScriptParserBase::tokenize(MemoryBufferRef MB) { + std::vector<StringRef> Vec; + MBs.push_back(MB); + StringRef S = MB.getBuffer(); + StringRef Begin = S; + for (;;) { S = skipSpace(S); if (S.empty()) - return Ret; + break; - // Quoted token + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. if (S.startswith("\"")) { size_t E = S.find("\"", 1); if (E == StringRef::npos) { - error("unclosed quote"); - return {}; + StringRef Filename = MB.getBufferIdentifier(); + size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); + error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); + return; } - Ret.push_back(S.substr(1, E - 1)); + + Vec.push_back(S.take_front(E + 1)); S = S.substr(E + 1); continue; } - // Unquoted token + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. size_t Pos = S.find_first_not_of( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-:!<>"); + "0123456789_.$/\\~=+[]*?-:!<>^"); + // A character that cannot start a word (which is usually a // punctuation) forms a single character token. if (Pos == 0) Pos = 1; - Ret.push_back(S.substr(0, Pos)); + Vec.push_back(S.substr(0, Pos)); S = S.substr(Pos); } + + Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); } // Skip leading whitespace characters or comments. @@ -132,19 +164,16 @@ StringRef ScriptParserBase::peek() { return Tok; } -bool ScriptParserBase::skip(StringRef Tok) { - if (Error) - return false; - if (atEOF()) { - setError("unexpected EOF"); - return false; +bool ScriptParserBase::consume(StringRef Tok) { + if (peek() == Tok) { + skip(); + return true; } - if (Tokens[Pos] != Tok) - return false; - ++Pos; - return true; + return false; } +void ScriptParserBase::skip() { (void)next(); } + void ScriptParserBase::expect(StringRef Expect) { if (Error) return; @@ -153,11 +182,19 @@ void ScriptParserBase::expect(StringRef Expect) { setError(Expect + " expected, but got " + Tok); } -// Returns the current line number. -size_t ScriptParserBase::getPos() { - if (Pos == 0) - return 1; - const char *Begin = Input.data(); - const char *Tok = Tokens[Pos - 1].data(); - return StringRef(Begin, Tok - Begin).count('\n') + 1; +// Returns true if S encloses T. +static bool encloses(StringRef S, StringRef T) { + return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +} + +MemoryBufferRef ScriptParserBase::getCurrentMB() { + // Find input buffer containing the current token. + assert(!MBs.empty()); + if (!Pos) + return MBs[0]; + + for (MemoryBufferRef MB : MBs) + if (encloses(MB.getBuffer(), Tokens[Pos - 1])) + return MB; + llvm_unreachable("getCurrentMB: failed to find a token"); } diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.h b/contrib/llvm/tools/lld/ELF/ScriptParser.h index 20735f78da81..264c49792337 100644 --- a/contrib/llvm/tools/lld/ELF/ScriptParser.h +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.h @@ -12,6 +12,7 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" #include <utility> #include <vector> @@ -20,27 +21,30 @@ namespace elf { class ScriptParserBase { public: - explicit ScriptParserBase(StringRef S) : Input(S), Tokens(tokenize(S)) {} - explicit ScriptParserBase(std::vector<StringRef> Tokens) - : Input(""), Tokens(std::move(Tokens)) {} + explicit ScriptParserBase(MemoryBufferRef MB); -protected: void setError(const Twine &Msg); - static std::vector<StringRef> tokenize(StringRef S); + void tokenize(MemoryBufferRef MB); static StringRef skipSpace(StringRef S); bool atEOF(); StringRef next(); StringRef peek(); - bool skip(StringRef Tok); + void skip(); + bool consume(StringRef Tok); void expect(StringRef Expect); + std::string getCurrentLocation(); - size_t getPos(); - void printErrorPos(); - - StringRef Input; + std::vector<MemoryBufferRef> MBs; std::vector<StringRef> Tokens; size_t Pos = 0; bool Error = false; + +private: + StringRef getLine(); + size_t getLineNumber(); + size_t getColumnNumber(); + + MemoryBufferRef getCurrentMB(); }; } // namespace elf diff --git a/contrib/llvm/tools/lld/ELF/Strings.cpp b/contrib/llvm/tools/lld/ELF/Strings.cpp index 0c21e8819d6c..ec3d1f1b2b51 100644 --- a/contrib/llvm/tools/lld/ELF/Strings.cpp +++ b/contrib/llvm/tools/lld/ELF/Strings.cpp @@ -8,44 +8,59 @@ //===----------------------------------------------------------------------===// #include "Strings.h" +#include "Config.h" #include "Error.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/Config/config.h" +#include "llvm/Demangle/Demangle.h" #include <algorithm> - -#ifdef HAVE_CXXABI_H -#include <cxxabi.h> -#endif +#include <cstring> using namespace llvm; using namespace lld; using namespace lld::elf; -// Returns true if S matches T. S can contain glob meta-characters. -// The asterisk ('*') matches zero or more characters, and the question -// mark ('?') matches one character. -bool elf::globMatch(StringRef S, StringRef T) { - for (;;) { - if (S.empty()) - return T.empty(); - if (S[0] == '*') { - S = S.substr(1); - if (S.empty()) - // Fast path. If a pattern is '*', it matches anything. - return true; - for (size_t I = 0, E = T.size(); I < E; ++I) - if (globMatch(S, T.substr(I))) - return true; - return false; - } - if (T.empty() || (S[0] != T[0] && S[0] != '?')) - return false; - S = S.substr(1); - T = T.substr(1); +StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) { + for (StringRef S : Pat) { + Expected<GlobPattern> Pat = GlobPattern::create(S); + if (!Pat) + error(toString(Pat.takeError())); + else + Patterns.push_back(*Pat); } } +bool StringMatcher::match(StringRef S) const { + for (const GlobPattern &Pat : Patterns) + if (Pat.match(S)) + return true; + return false; +} + +// If an input string is in the form of "foo.N" where N is a number, +// return N. Otherwise, returns 65536, which is one greater than the +// lowest priority. +int elf::getPriority(StringRef S) { + size_t Pos = S.rfind('.'); + if (Pos == StringRef::npos) + return 65536; + int V; + if (S.substr(Pos + 1).getAsInteger(10, V)) + return 65536; + return V; +} + +bool elf::hasWildcard(StringRef S) { + return S.find_first_of("?*[") != StringRef::npos; +} + +StringRef elf::unquote(StringRef S) { + if (!S.startswith("\"")) + return S; + return S.substr(1, S.size() - 2); +} + // Converts a hex string (e.g. "deadbeef") to a vector. std::vector<uint8_t> elf::parseHex(StringRef S) { std::vector<uint8_t> Hex; @@ -75,24 +90,19 @@ bool elf::isValidCIdentifier(StringRef S) { } // Returns the demangled C++ symbol name for Name. -std::string elf::demangle(StringRef Name) { -#if !defined(HAVE_CXXABI_H) - return Name; -#else +Optional<std::string> elf::demangle(StringRef Name) { // __cxa_demangle can be used to demangle strings other than symbol // names which do not necessarily start with "_Z". Name can be // either a C or C++ symbol. Don't call __cxa_demangle if the name // does not look like a C++ symbol name to avoid getting unexpected // result for a C symbol that happens to match a mangled type name. if (!Name.startswith("_Z")) - return Name; + return None; - char *Buf = - abi::__cxa_demangle(Name.str().c_str(), nullptr, nullptr, nullptr); + char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr); if (!Buf) - return Name; + return None; std::string S(Buf); free(Buf); return S; -#endif } diff --git a/contrib/llvm/tools/lld/ELF/Strings.h b/contrib/llvm/tools/lld/ELF/Strings.h index 4948e9dbd56b..934b6427105f 100644 --- a/contrib/llvm/tools/lld/ELF/Strings.h +++ b/contrib/llvm/tools/lld/ELF/Strings.h @@ -7,22 +7,75 @@ // //===----------------------------------------------------------------------===// -#ifndef LLD_COFF_STRINGS_H -#define LLD_COFF_STRINGS_H +#ifndef LLD_ELF_STRINGS_H +#define LLD_ELF_STRINGS_H #include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/GlobPattern.h" #include <vector> namespace lld { namespace elf { -bool globMatch(StringRef S, StringRef T); + +int getPriority(StringRef S); +bool hasWildcard(StringRef S); std::vector<uint8_t> parseHex(StringRef S); bool isValidCIdentifier(StringRef S); +StringRef unquote(StringRef S); + +// This is a lazy version of StringRef. String size is computed lazily +// when it is needed. It is more efficient than StringRef to instantiate +// if you have a string whose size is unknown. +// +// ELF string tables contain a lot of null-terminated strings. +// Most of them are not necessary for the linker because they are names +// of local symbols and the linker doesn't use local symbol names for +// name resolution. So, we use this class to represents strings read +// from string tables. +class StringRefZ { +public: + StringRefZ() : Start(nullptr), Size(0) {} + StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {} + + /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {} + + /*implicit*/ StringRefZ(llvm::StringRef S) + : Start(S.data()), Size(S.size()) {} + + operator llvm::StringRef() const { + if (Size == (size_t)-1) + Size = strlen(Start); + return {Start, Size}; + } + +private: + const char *Start; + mutable size_t Size; +}; + +// This class represents multiple glob patterns. +class StringMatcher { +public: + StringMatcher() = default; + explicit StringMatcher(ArrayRef<StringRef> Pat); + + bool match(StringRef S) const; + +private: + std::vector<llvm::GlobPattern> Patterns; +}; // Returns a demangled C++ symbol name. If Name is not a mangled -// name or the system does not provide __cxa_demangle function, -// it returns an unmodified string. -std::string demangle(StringRef Name); +// name, it returns Optional::None. +llvm::Optional<std::string> demangle(StringRef Name); + +inline StringRef toStringRef(ArrayRef<uint8_t> Arr) { + return {(const char *)Arr.data(), Arr.size()}; +} } } diff --git a/contrib/llvm/tools/lld/ELF/SymbolListFile.cpp b/contrib/llvm/tools/lld/ELF/SymbolListFile.cpp deleted file mode 100644 index 9e088025c1b7..000000000000 --- a/contrib/llvm/tools/lld/ELF/SymbolListFile.cpp +++ /dev/null @@ -1,168 +0,0 @@ -//===- SymbolListFile.cpp -------------------------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the parser/evaluator of the linker script. -// It does not construct an AST but consume linker script directives directly. -// Results are written to Driver or Config object. -// -//===----------------------------------------------------------------------===// - -#include "SymbolListFile.h" -#include "Config.h" -#include "ScriptParser.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; -using namespace llvm::ELF; - -using namespace lld; -using namespace lld::elf; - -// Parse the --dynamic-list argument. A dynamic list is in the form -// -// { symbol1; symbol2; [...]; symbolN }; -// -// Multiple groups can be defined in the same file, and they are merged -// into a single group. - -class DynamicListParser final : public ScriptParserBase { -public: - DynamicListParser(StringRef S) : ScriptParserBase(S) {} - void run(); -}; - -void DynamicListParser::run() { - while (!atEOF()) { - expect("{"); - while (!Error) { - Config->DynamicList.push_back(next()); - expect(";"); - if (skip("}")) - break; - } - expect(";"); - } -} - -void elf::parseDynamicList(MemoryBufferRef MB) { - DynamicListParser(MB.getBuffer()).run(); -} - -// Parse the --version-script argument. We currently only accept the following -// version script syntax: -// -// { [ global: symbol1; symbol2; [...]; symbolN; ] local: *; }; -// -// No wildcards are supported, other than for the local entry. Symbol versioning -// is also not supported. - -class VersionScriptParser final : public ScriptParserBase { -public: - VersionScriptParser(StringRef S) : ScriptParserBase(S) {} - - void run(); - -private: - void parseExtern(std::vector<SymbolVersion> *Globals); - void parseVersion(StringRef VerStr); - void parseGlobal(StringRef VerStr); - void parseLocal(); -}; - -size_t elf::defineSymbolVersion(StringRef VerStr) { - // Identifiers start at 2 because 0 and 1 are reserved - // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. - size_t VersionId = Config->VersionDefinitions.size() + 2; - Config->VersionDefinitions.push_back({VerStr, VersionId}); - return VersionId; -} - -void VersionScriptParser::parseVersion(StringRef VerStr) { - defineSymbolVersion(VerStr); - - if (skip("global:") || peek() != "local:") - parseGlobal(VerStr); - if (skip("local:")) - parseLocal(); - expect("}"); - - // Each version may have a parent version. For example, "Ver2" defined as - // "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This - // version hierarchy is, probably against your instinct, purely for human; the - // runtime doesn't care about them at all. In LLD, we simply skip the token. - if (!VerStr.empty() && peek() != ";") - next(); - expect(";"); -} - -void VersionScriptParser::parseLocal() { - Config->DefaultSymbolVersion = VER_NDX_LOCAL; - expect("*"); - expect(";"); -} - -void VersionScriptParser::parseExtern(std::vector<SymbolVersion> *Globals) { - expect("C++"); - expect("{"); - - for (;;) { - if (peek() == "}" || Error) - break; - Globals->push_back({next(), true}); - expect(";"); - } - - expect("}"); - expect(";"); -} - -void VersionScriptParser::parseGlobal(StringRef VerStr) { - std::vector<SymbolVersion> *Globals; - if (VerStr.empty()) - Globals = &Config->VersionScriptGlobals; - else - Globals = &Config->VersionDefinitions.back().Globals; - - for (;;) { - if (skip("extern")) - parseExtern(Globals); - - StringRef Cur = peek(); - if (Cur == "}" || Cur == "local:" || Error) - return; - next(); - Globals->push_back({Cur, false}); - expect(";"); - } -} - -void VersionScriptParser::run() { - StringRef Msg = "anonymous version definition is used in " - "combination with other version definitions"; - if (skip("{")) { - parseVersion(""); - if (!atEOF()) - setError(Msg); - return; - } - - while (!atEOF() && !Error) { - StringRef VerStr = next(); - if (VerStr == "{") { - setError(Msg); - return; - } - expect("{"); - parseVersion(VerStr); - } -} - -void elf::parseVersionScript(MemoryBufferRef MB) { - VersionScriptParser(MB.getBuffer()).run(); -} diff --git a/contrib/llvm/tools/lld/ELF/SymbolListFile.h b/contrib/llvm/tools/lld/ELF/SymbolListFile.h deleted file mode 100644 index cf3c4c639ea4..000000000000 --- a/contrib/llvm/tools/lld/ELF/SymbolListFile.h +++ /dev/null @@ -1,27 +0,0 @@ -//===- SymbolListFile.h -----------------------------------------*- C++ -*-===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_ELF_SYMBOL_LIST_FILE_H -#define LLD_ELF_SYMBOL_LIST_FILE_H - -#include "lld/Core/LLVM.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace lld { -namespace elf { - -size_t defineSymbolVersion(StringRef Version); - -void parseDynamicList(MemoryBufferRef MB); -void parseVersionScript(MemoryBufferRef MB); - -} // namespace elf -} // namespace lld - -#endif diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp index 78c1298df427..79097e176e68 100644 --- a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp @@ -18,11 +18,9 @@ #include "Config.h" #include "Error.h" #include "LinkerScript.h" -#include "Strings.h" -#include "SymbolListFile.h" +#include "Memory.h" #include "Symbols.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Support/StringSaver.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; using namespace llvm::object; @@ -37,62 +35,69 @@ using namespace lld::elf; template <class ELFT> static bool isCompatible(InputFile *F) { if (!isa<ELFFileBase<ELFT>>(F) && !isa<BitcodeFile>(F)) return true; - if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) - return true; - StringRef A = F->getName(); - StringRef B = Config->Emulation; - if (B.empty()) - B = Config->FirstElf->getName(); - error(A + " is incompatible with " + B); + + if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) { + if (Config->EMachine != EM_MIPS) + return true; + if (isMipsN32Abi(F) == Config->MipsN32Abi) + return true; + } + + if (!Config->Emulation.empty()) + error(toString(F) + " is incompatible with " + Config->Emulation); + else + error(toString(F) + " is incompatible with " + toString(Config->FirstElf)); return false; } // Add symbols in File to the symbol table. -template <class ELFT> -void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) { - InputFile *FileP = File.get(); - if (!isCompatible<ELFT>(FileP)) +template <class ELFT> void SymbolTable<ELFT>::addFile(InputFile *File) { + if (!isCompatible<ELFT>(File)) + return; + + // Binary file + if (auto *F = dyn_cast<BinaryFile>(File)) { + BinaryFiles.push_back(F); + F->parse<ELFT>(); return; + } // .a file - if (auto *F = dyn_cast<ArchiveFile>(FileP)) { - ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release())); + if (auto *F = dyn_cast<ArchiveFile>(File)) { F->parse<ELFT>(); return; } // Lazy object file - if (auto *F = dyn_cast<LazyObjectFile>(FileP)) { - LazyObjectFiles.emplace_back(cast<LazyObjectFile>(File.release())); + if (auto *F = dyn_cast<LazyObjectFile>(File)) { F->parse<ELFT>(); return; } if (Config->Trace) - outs() << getFilename(FileP) << "\n"; + outs() << toString(File) << "\n"; // .so file - if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) { + if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) { // DSOs are uniquified not by filename but by soname. F->parseSoName(); - if (!SoNames.insert(F->getSoName()).second) + if (ErrorCount || !SoNames.insert(F->getSoName()).second) return; - - SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release())); + SharedFiles.push_back(F); F->parseRest(); return; } // LLVM bitcode file - if (auto *F = dyn_cast<BitcodeFile>(FileP)) { - BitcodeFiles.emplace_back(cast<BitcodeFile>(File.release())); + if (auto *F = dyn_cast<BitcodeFile>(File)) { + BitcodeFiles.push_back(F); F->parse<ELFT>(ComdatGroups); return; } // Regular object file - auto *F = cast<ObjectFile<ELFT>>(FileP); - ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release())); + auto *F = cast<ObjectFile<ELFT>>(File); + ObjectFiles.push_back(F); F->parse(ComdatGroups); } @@ -103,31 +108,30 @@ void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) { // using LLVM functions and replaces bitcode symbols with the results. // Because all bitcode files that consist of a program are passed // to the compiler at once, it can do whole-program optimization. -template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() { +template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() { if (BitcodeFiles.empty()) return; - // Compile bitcode files. - Lto.reset(new BitcodeCompiler); - for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles) - Lto->add(*F); - std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile(); + // Compile bitcode files and replace bitcode symbols. + LTO.reset(new BitcodeCompiler); + for (BitcodeFile *F : BitcodeFiles) + LTO->add(*F); - // Replace bitcode symbols. - for (auto &IF : IFs) { - ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release()); - - DenseSet<StringRef> DummyGroups; + for (InputFile *File : LTO->compile()) { + ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(File); + DenseSet<CachedHashStringRef> DummyGroups; Obj->parse(DummyGroups); - ObjectFiles.emplace_back(Obj); + ObjectFiles.push_back(Obj); } } template <class ELFT> DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name, - uint8_t Visibility) { - return cast<DefinedRegular<ELFT>>( - addRegular(Name, STB_GLOBAL, Visibility)->body()); + uint8_t Visibility, + uint8_t Binding) { + Symbol *Sym = + addRegular(Name, Visibility, STT_NOTYPE, 0, 0, Binding, nullptr, nullptr); + return cast<DefinedRegular<ELFT>>(Sym->body()); } // Add Name as an "ignored" symbol. An ignored symbol is a regular @@ -135,7 +139,8 @@ DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name, template <class ELFT> DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name, uint8_t Visibility) { - if (!find(Name)) + SymbolBody *S = find(Name); + if (!S || !S->isUndefined()) return nullptr; return addAbsolute(Name, Visibility); } @@ -143,7 +148,7 @@ DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name, // Set a flag for --trace-symbol so that we can print out a log message // if a new symbol with the same name is inserted into the symbol table. template <class ELFT> void SymbolTable<ELFT>::trace(StringRef Name) { - Symtab.insert({Name, {-1, true}}); + Symtab.insert({CachedHashStringRef(Name), {-1, true}}); } // Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM. @@ -152,10 +157,10 @@ template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) { SymbolBody *B = find(Name); if (!B) return; - StringSaver Saver(Alloc); Symbol *Sym = B->symbol(); Symbol *Real = addUndefined(Saver.save("__real_" + Name)); Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); + // We rename symbols by replacing the old symbol's SymbolBody with the new // symbol's SymbolBody. This causes all SymbolBody pointers referring to the // old symbol to instead refer to the new symbol. @@ -174,24 +179,26 @@ static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { // Find an existing symbol or create and insert a new one. template <class ELFT> std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { - auto P = Symtab.insert({Name, {(int)SymVector.size(), false}}); + auto P = Symtab.insert( + {CachedHashStringRef(Name), SymIndex((int)SymVector.size(), false)}); SymIndex &V = P.first->second; bool IsNew = P.second; if (V.Idx == -1) { IsNew = true; - V = {(int)SymVector.size(), true}; + V = SymIndex((int)SymVector.size(), true); } Symbol *Sym; if (IsNew) { - Sym = new (Alloc) Symbol; + Sym = new (BAlloc) Symbol; + Sym->InVersionScript = false; Sym->Binding = STB_WEAK; Sym->Visibility = STV_DEFAULT; Sym->IsUsedInRegularObj = false; Sym->ExportDynamic = false; - Sym->VersionId = Config->DefaultSymbolVersion; Sym->Traced = V.Traced; + Sym->VersionId = Config->DefaultSymbolVersion; SymVector.push_back(Sym); } else { Sym = SymVector[V.Idx]; @@ -199,13 +206,20 @@ std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { return {Sym, IsNew}; } +// Construct a string in the form of "Sym in File1 and File2". +// Used to construct an error message. +static std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile) { + return "'" + toString(*Existing) + "' in " + toString(Existing->File) + + " and " + toString(NewFile); +} + // Find an existing symbol or create and insert a new one, then apply the given // attributes. template <class ELFT> std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility, - bool CanOmitFromDynSym, bool IsUsedInRegularObj, - InputFile *File) { + bool CanOmitFromDynSym, InputFile *File) { + bool IsUsedInRegularObj = !File || File->kind() == InputFile::ObjectKind; Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); @@ -218,42 +232,31 @@ SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility, S->IsUsedInRegularObj = true; if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && ((Type == STT_TLS) != S->body()->isTls())) - error("TLS attribute mismatch for symbol: " + - conflictMsg(S->body(), File)); + error("TLS attribute mismatch for symbol " + conflictMsg(S->body(), File)); return {S, WasInserted}; } -// Construct a string in the form of "Sym in File1 and File2". -// Used to construct an error message. -template <typename ELFT> -std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Existing, - InputFile *NewFile) { - std::string Sym = Existing->getName(); - if (Config->Demangle) - Sym = demangle(Sym); - return Sym + " in " + getFilename(Existing->File) + " and " + - getFilename(NewFile); -} - template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) { - return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, + return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT, + /*Type*/ 0, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); } +static uint8_t getVisibility(uint8_t StOther) { return StOther & 3; } + template <class ELFT> -Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding, - uint8_t StOther, uint8_t Type, - bool CanOmitFromDynSym, +Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal, + uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = - insert(Name, Type, StOther & 3, CanOmitFromDynSym, - /*IsUsedInRegularObj*/ !File || !isa<BitcodeFile>(File), File); + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File); if (WasInserted) { S->Binding = Binding; - replaceBody<Undefined>(S, Name, StOther, Type, File); + replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File); return S; } if (Binding != STB_WEAK) { @@ -267,8 +270,8 @@ Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding, // its type. See also comment in addLazyArchive. if (S->isWeak()) L->Type = Type; - else if (auto F = L->fetch()) - addFile(std::move(F)); + else if (InputFile *F = L->fetch()) + addFile(F); } return S; } @@ -292,17 +295,24 @@ static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { // We have a new non-common defined symbol with the specified binding. Return 1 // if the new symbol should win, -1 if the new symbol should lose, or 0 if there // is a conflict. If the new symbol wins, also update the binding. -static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding) { +template <typename ELFT> +static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, + bool IsAbsolute, typename ELFT::uint Value) { if (int Cmp = compareDefined(S, WasInserted, Binding)) { if (Cmp > 0) S->Binding = Binding; return Cmp; } - if (isa<DefinedCommon>(S->body())) { + SymbolBody *B = S->body(); + if (isa<DefinedCommon>(B)) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) - warning("common " + S->body()->getName() + " is overridden"); + warn("common " + S->body()->getName() + " is overridden"); return 1; + } else if (auto *R = dyn_cast<DefinedRegular<ELFT>>(B)) { + if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute && + R->Value == Value) + return -1; } return 0; } @@ -314,9 +324,8 @@ Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, InputFile *File) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = - insert(N, Type, StOther & 3, /*CanOmitFromDynSym*/ false, - /*IsUsedInRegularObj*/ true, File); + std::tie(S, WasInserted) = insert(N, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); int Cmp = compareDefined(S, WasInserted, Binding); if (Cmp > 0) { S->Binding = Binding; @@ -326,74 +335,80 @@ Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, if (!C) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) - warning("common " + S->body()->getName() + " is overridden"); + warn("common " + S->body()->getName() + " is overridden"); return S; } if (Config->WarnCommon) - warning("multiple common of " + S->body()->getName()); + warn("multiple common of " + S->body()->getName()); - C->Size = std::max(C->Size, Size); - C->Alignment = std::max(C->Alignment, Alignment); + Alignment = C->Alignment = std::max(C->Alignment, Alignment); + if (Size > C->Size) + replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File); } return S; } -template <class ELFT> -void SymbolTable<ELFT>::reportDuplicate(SymbolBody *Existing, - InputFile *NewFile) { - std::string Msg = "duplicate symbol: " + conflictMsg(Existing, NewFile); +static void print(const Twine &Msg) { if (Config->AllowMultipleDefinition) - warning(Msg); + warn(Msg); else error(Msg); } -template <typename ELFT> -Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, const Elf_Sym &Sym, - InputSectionBase<ELFT> *Section) { - Symbol *S; - bool WasInserted; - std::tie(S, WasInserted) = - insert(Name, Sym.getType(), Sym.getVisibility(), - /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, - Section ? Section->getFile() : nullptr); - int Cmp = compareDefinedNonCommon(S, WasInserted, Sym.getBinding()); - if (Cmp > 0) - replaceBody<DefinedRegular<ELFT>>(S, Name, Sym, Section); - else if (Cmp == 0) - reportDuplicate(S->body(), Section->getFile()); - return S; +static void reportDuplicate(SymbolBody *Existing, InputFile *NewFile) { + print("duplicate symbol " + conflictMsg(Existing, NewFile)); +} + +template <class ELFT> +static void reportDuplicate(SymbolBody *Existing, + InputSectionBase<ELFT> *ErrSec, + typename ELFT::uint ErrOffset) { + DefinedRegular<ELFT> *D = dyn_cast<DefinedRegular<ELFT>>(Existing); + if (!D || !D->Section || !ErrSec) { + reportDuplicate(Existing, ErrSec ? ErrSec->getFile() : nullptr); + return; + } + + std::string OldLoc = D->Section->getLocation(D->Value); + std::string NewLoc = ErrSec->getLocation(ErrOffset); + + print(NewLoc + ": duplicate symbol '" + toString(*Existing) + "'"); + print(OldLoc + ": previous definition was here"); } template <typename ELFT> -Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t Binding, - uint8_t StOther) { +Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther, + uint8_t Type, uintX_t Value, uintX_t Size, + uint8_t Binding, + InputSectionBase<ELFT> *Section, + InputFile *File) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = - insert(Name, STT_NOTYPE, StOther & 3, /*CanOmitFromDynSym*/ false, - /*IsUsedInRegularObj*/ true, nullptr); - int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); + std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, File); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + Section == nullptr, Value); if (Cmp > 0) - replaceBody<DefinedRegular<ELFT>>(S, Name, StOther); + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + Value, Size, Section, File); else if (Cmp == 0) - reportDuplicate(S->body(), nullptr); + reportDuplicate(S->body(), Section, Value); return S; } template <typename ELFT> Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N, - OutputSectionBase<ELFT> *Section, - uintX_t Value) { + const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = - insert(N, STT_NOTYPE, STV_HIDDEN, /*CanOmitFromDynSym*/ false, - /*IsUsedInRegularObj*/ true, nullptr); - int Cmp = compareDefinedNonCommon(S, WasInserted, STB_GLOBAL); + std::tie(S, WasInserted) = insert(N, STT_NOTYPE, getVisibility(StOther), + /*CanOmitFromDynSym*/ false, nullptr); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, STB_GLOBAL, + /*IsAbsolute*/ false, /*Value*/ 0); if (Cmp > 0) - replaceBody<DefinedSynthetic<ELFT>>(S, N, Value, Section); + replaceBody<DefinedSynthetic>(S, N, Value, Section); else if (Cmp == 0) reportDuplicate(S->body(), nullptr); return S; @@ -409,11 +424,14 @@ void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, Symbol *S; bool WasInserted; std::tie(S, WasInserted) = - insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, - /*IsUsedInRegularObj*/ false, F); + insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, F); // Make sure we preempt DSO symbols with default visibility. - if (Sym.getVisibility() == STV_DEFAULT) + if (Sym.getVisibility() == STV_DEFAULT) { S->ExportDynamic = true; + // Exporting preempting symbols takes precedence over linker scripts. + if (S->VersionId == VER_NDX_LOCAL) + S->VersionId = VER_NDX_GLOBAL; + } if (WasInserted || isa<Undefined>(S->body())) { replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef); if (!S->isWeak()) @@ -422,24 +440,25 @@ void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, } template <class ELFT> -Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, bool IsWeak, +Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *F) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym, - /*IsUsedInRegularObj*/ false, F); - int Cmp = - compareDefinedNonCommon(S, WasInserted, IsWeak ? STB_WEAK : STB_GLOBAL); + std::tie(S, WasInserted) = + insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, F); + int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, + /*IsAbs*/ false, /*Value*/ 0); if (Cmp > 0) - replaceBody<DefinedBitcode>(S, Name, StOther, Type, F); + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, + 0, 0, nullptr, F); else if (Cmp == 0) reportDuplicate(S->body(), F); return S; } template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { - auto It = Symtab.find(Name); + auto It = Symtab.find(CachedHashStringRef(Name)); if (It == Symtab.end()) return nullptr; SymIndex V = It->second; @@ -448,24 +467,13 @@ template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { return SymVector[V.Idx]->body(); } -// Returns a list of defined symbols that match with a given glob pattern. -template <class ELFT> -std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) { - std::vector<SymbolBody *> Res; - for (Symbol *Sym : SymVector) { - SymbolBody *B = Sym->body(); - if (!B->isUndefined() && globMatch(Pattern, B->getName())) - Res.push_back(B); - } - return Res; -} - template <class ELFT> void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F, const object::Archive::Symbol Sym) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(Sym.getName()); + StringRef Name = Sym.getName(); + std::tie(S, WasInserted) = insert(Name); if (WasInserted) { replaceBody<LazyArchive>(S, *F, Sym, SymbolBody::UnknownType); return; @@ -484,9 +492,9 @@ void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F, replaceBody<LazyArchive>(S, *F, Sym, S->body()->Type); return; } - MemoryBufferRef MBRef = F->getMember(&Sym); - if (!MBRef.getBuffer().empty()) - addFile(createObjectFile(MBRef, F->getName())); + std::pair<MemoryBufferRef, uint64_t> MBInfo = F->getMember(&Sym); + if (!MBInfo.first.getBuffer().empty()) + addFile(createObjectFile(MBInfo.first, F->getName(), MBInfo.second)); } template <class ELFT> @@ -515,8 +523,8 @@ void SymbolTable<ELFT>::addLazyObject(StringRef Name, LazyObjectFile &Obj) { template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() { for (StringRef S : Config->Undefined) if (auto *L = dyn_cast_or_null<Lazy>(find(S))) - if (std::unique_ptr<InputFile> File = L->fetch()) - addFile(std::move(File)); + if (InputFile *File = L->fetch()) + addFile(File); } // This function takes care of the case in which shared libraries depend on @@ -527,184 +535,173 @@ template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() { // shared libraries can find them. // Except this, we ignore undefined symbols in DSOs. template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() { - for (std::unique_ptr<SharedFile<ELFT>> &File : SharedFiles) + for (SharedFile<ELFT> *File : SharedFiles) for (StringRef U : File->getUndefinedSymbols()) if (SymbolBody *Sym = find(U)) if (Sym->isDefined()) Sym->symbol()->ExportDynamic = true; } -// This function process the dynamic list option by marking all the symbols -// to be exported in the dynamic table. -template <class ELFT> void SymbolTable<ELFT>::scanDynamicList() { - for (StringRef S : Config->DynamicList) - if (SymbolBody *B = find(S)) - B->symbol()->ExportDynamic = true; +// Initialize DemangledSyms with a map from demangled symbols to symbol +// objects. Used to handle "extern C++" directive in version scripts. +// +// The map will contain all demangled symbols. That can be very large, +// and in LLD we generally want to avoid do anything for each symbol. +// Then, why are we doing this? Here's why. +// +// Users can use "extern C++ {}" directive to match against demangled +// C++ symbols. For example, you can write a pattern such as +// "llvm::*::foo(int, ?)". Obviously, there's no way to handle this +// other than trying to match a pattern against all demangled symbols. +// So, if "extern C++" feature is used, we need to demangle all known +// symbols. +template <class ELFT> +StringMap<std::vector<SymbolBody *>> &SymbolTable<ELFT>::getDemangledSyms() { + if (!DemangledSyms) { + DemangledSyms.emplace(); + for (Symbol *Sym : SymVector) { + SymbolBody *B = Sym->body(); + if (B->isUndefined()) + continue; + if (Optional<std::string> S = demangle(B->getName())) + (*DemangledSyms)[*S].push_back(B); + else + (*DemangledSyms)[B->getName()].push_back(B); + } + } + return *DemangledSyms; } -static bool hasWildcard(StringRef S) { - return S.find_first_of("?*") != StringRef::npos; +template <class ELFT> +std::vector<SymbolBody *> SymbolTable<ELFT>::findByVersion(SymbolVersion Ver) { + if (Ver.IsExternCpp) + return getDemangledSyms().lookup(Ver.Name); + if (SymbolBody *B = find(Ver.Name)) + if (!B->isUndefined()) + return {B}; + return {}; } -static void setVersionId(SymbolBody *Body, StringRef VersionName, - StringRef Name, uint16_t Version) { - if (!Body || Body->isUndefined()) { - if (Config->NoUndefinedVersion) - error("version script assignment of " + VersionName + " to symbol " + - Name + " failed: symbol not defined"); - return; - } +template <class ELFT> +std::vector<SymbolBody *> +SymbolTable<ELFT>::findAllByVersion(SymbolVersion Ver) { + std::vector<SymbolBody *> Res; + StringMatcher M(Ver.Name); - Symbol *Sym = Body->symbol(); - if (Sym->VersionId != Config->DefaultSymbolVersion) - warning("duplicate symbol " + Name + " in version script"); - Sym->VersionId = Version; -} + if (Ver.IsExternCpp) { + for (auto &P : getDemangledSyms()) + if (M.match(P.first())) + Res.insert(Res.end(), P.second.begin(), P.second.end()); + return Res; + } -template <class ELFT> -std::map<std::string, SymbolBody *> SymbolTable<ELFT>::getDemangledSyms() { - std::map<std::string, SymbolBody *> Result; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); - Result[demangle(B->getName())] = B; + if (!B->isUndefined() && M.match(B->getName())) + Res.push_back(B); } - return Result; -} - -static bool hasExternCpp() { - for (VersionDefinition &V : Config->VersionDefinitions) - for (SymbolVersion Sym : V.Globals) - if (Sym.IsExternCpp) - return true; - return false; + return Res; } -// This function processes the --version-script option by marking all global -// symbols with the VersionScriptGlobal flag, which acts as a filter on the -// dynamic symbol table. -template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() { - // If version script does not contain versions declarations, - // we just should mark global symbols. - if (!Config->VersionScriptGlobals.empty()) { - for (SymbolVersion &Sym : Config->VersionScriptGlobals) - if (SymbolBody *B = find(Sym.Name)) +// If there's only one anonymous version definition in a version +// script file, the script does not actually define any symbol version, +// but just specifies symbols visibilities. We assume that the script was +// in the form of { global: foo; bar; local *; }. So, local is default. +// In this function, we make specified symbols global. +template <class ELFT> void SymbolTable<ELFT>::handleAnonymousVersion() { + for (SymbolVersion &Ver : Config->VersionScriptGlobals) { + if (Ver.HasWildcard) { + for (SymbolBody *B : findAllByVersion(Ver)) B->symbol()->VersionId = VER_NDX_GLOBAL; - return; + continue; + } + for (SymbolBody *B : findByVersion(Ver)) + B->symbol()->VersionId = VER_NDX_GLOBAL; } +} - if (Config->VersionDefinitions.empty()) +// Set symbol versions to symbols. This function handles patterns +// containing no wildcard characters. +template <class ELFT> +void SymbolTable<ELFT>::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName) { + if (Ver.HasWildcard) return; - // If we have symbols version declarations, we should - // assign version references for each symbol. - // Current rules are: - // * If there is an exact match for the mangled name or we have extern C++ - // exact match, then we use it. - // * Otherwise, we look through the wildcard patterns. We look through the - // version tags in reverse order. We use the first match we find (the last - // matching version tag in the file). - // Handle exact matches and build a map of demangled externs for - // quick search during next step. - std::map<std::string, SymbolBody *> Demangled; - if (hasExternCpp()) - Demangled = getDemangledSyms(); - - for (VersionDefinition &V : Config->VersionDefinitions) { - for (SymbolVersion Sym : V.Globals) { - if (hasWildcard(Sym.Name)) - continue; - SymbolBody *B = Sym.IsExternCpp ? Demangled[Sym.Name] : find(Sym.Name); - setVersionId(B, V.Name, Sym.Name, V.Id); - } + // Get a list of symbols which we need to assign the version to. + std::vector<SymbolBody *> Syms = findByVersion(Ver); + if (Syms.empty()) { + if (Config->NoUndefinedVersion) + error("version script assignment of '" + VersionName + "' to symbol '" + + Ver.Name + "' failed: symbol not defined"); + return; } - // Handle wildcards. - for (size_t I = Config->VersionDefinitions.size() - 1; I != (size_t)-1; --I) { - VersionDefinition &V = Config->VersionDefinitions[I]; - for (SymbolVersion &Sym : V.Globals) - if (hasWildcard(Sym.Name)) - for (SymbolBody *B : findAll(Sym.Name)) - if (B->symbol()->VersionId == Config->DefaultSymbolVersion) - B->symbol()->VersionId = V.Id; + // Assign the version. + for (SymbolBody *B : Syms) { + Symbol *Sym = B->symbol(); + if (Sym->InVersionScript) + warn("duplicate symbol '" + Ver.Name + "' in version script"); + Sym->VersionId = VersionId; + Sym->InVersionScript = true; } } -// Returns the size of the longest version name. -static int getMaxVersionLen() { - size_t Len = 0; - for (VersionDefinition &V : Config->VersionDefinitions) - Len = std::max(Len, V.Name.size()); - return Len; -} - -// Parses a symbol name in the form of <name>@<version> or <name>@@<version>. -static std::pair<StringRef, uint16_t> -getSymbolVersion(SymbolBody *B, int MaxVersionLen) { - StringRef S = B->getName(); - - // MaxVersionLen was passed so that we don't need to scan - // all characters in a symbol name. It is effective because - // versions are usually short and symbol names can be very long. - size_t Pos = S.find('@', std::max(0, int(S.size()) - MaxVersionLen - 2)); - if (Pos == 0 || Pos == StringRef::npos) - return {"", 0}; - - StringRef Name = S.substr(0, Pos); - StringRef Verstr = S.substr(Pos + 1); - if (Verstr.empty()) - return {"", 0}; - - // '@@' in a symbol name means the default version. - // It is usually the most recent one. - bool IsDefault = (Verstr[0] == '@'); - if (IsDefault) - Verstr = Verstr.substr(1); - - for (VersionDefinition &V : Config->VersionDefinitions) { - if (V.Name == Verstr) - return {Name, IsDefault ? V.Id : (V.Id | VERSYM_HIDDEN)}; - } +template <class ELFT> +void SymbolTable<ELFT>::assignWildcardVersion(SymbolVersion Ver, + uint16_t VersionId) { + if (!Ver.HasWildcard) + return; + std::vector<SymbolBody *> Syms = findAllByVersion(Ver); - // It is an error if the specified version was not defined. - error("symbol " + S + " has undefined version " + Verstr); - return {"", 0}; + // Exact matching takes precendence over fuzzy matching, + // so we set a version to a symbol only if no version has been assigned + // to the symbol. This behavior is compatible with GNU. + for (SymbolBody *B : Syms) + if (B->symbol()->VersionId == Config->DefaultSymbolVersion) + B->symbol()->VersionId = VersionId; } -// Versions are usually assigned to symbols using version scripts, -// but there's another way to assign versions to symbols. -// If a symbol name contains '@', the string after it is not -// actually a part of the symbol name but specifies a version. -// This function takes care of it. -template <class ELFT> void SymbolTable<ELFT>::scanSymbolVersions() { - if (Config->VersionDefinitions.empty()) +// This function processes version scripts by updating VersionId +// member of symbols. +template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() { + // Symbol themselves might know their versions because symbols + // can contain versions in the form of <name>@<version>. + // Let them parse their names. + if (!Config->VersionDefinitions.empty()) + for (Symbol *Sym : SymVector) + Sym->body()->parseSymbolVersion(); + + // Handle edge cases first. + if (!Config->VersionScriptGlobals.empty()) { + handleAnonymousVersion(); return; + } - int MaxVersionLen = getMaxVersionLen(); - - // Unfortunately there's no way other than iterating over all - // symbols to look for '@' characters in symbol names. - // So this is inherently slow. A good news is that we do this - // only when versions have been defined. - for (Symbol *Sym : SymVector) { - // Symbol versions for exported symbols are by nature - // only for defined global symbols. - SymbolBody *B = Sym->body(); - if (!B->isDefined()) - continue; - uint8_t Visibility = B->getVisibility(); - if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) - continue; + if (Config->VersionDefinitions.empty()) + return; - // Look for '@' in the symbol name. - StringRef Name; - uint16_t Version; - std::tie(Name, Version) = getSymbolVersion(B, MaxVersionLen); - if (Name.empty()) - continue; + // Now we have version definitions, so we need to set version ids to symbols. + // Each version definition has a glob pattern, and all symbols that match + // with the pattern get that version. - B->setName(Name); - Sym->VersionId = Version; - } + // First, we assign versions to exact matching symbols, + // i.e. version definitions not containing any glob meta-characters. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignExactVersion(Ver, VER_NDX_LOCAL, "local"); + for (VersionDefinition &V : Config->VersionDefinitions) + for (SymbolVersion &Ver : V.Globals) + assignExactVersion(Ver, V.Id, V.Name); + + // Next, we assign versions to fuzzy matching symbols, + // i.e. version definitions containing glob meta-characters. + // Note that because the last match takes precedence over previous matches, + // we iterate over the definitions in the reverse order. + for (SymbolVersion &Ver : Config->VersionScriptLocals) + assignWildcardVersion(Ver, VER_NDX_LOCAL); + for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions)) + for (SymbolVersion &Ver : V.Globals) + assignWildcardVersion(Ver, V.Id); } template class elf::SymbolTable<ELF32LE>; diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.h b/contrib/llvm/tools/lld/ELF/SymbolTable.h index 40415b645a44..1e5a335acc16 100644 --- a/contrib/llvm/tools/lld/ELF/SymbolTable.h +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.h @@ -12,16 +12,16 @@ #include "InputFiles.h" #include "LTO.h" +#include "Strings.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" namespace lld { namespace elf { class Lazy; -template <class ELFT> class OutputSectionBase; +class OutputSectionBase; struct Symbol; -typedef llvm::CachedHash<StringRef> SymName; - // SymbolTable is a bucket of all known symbols, including defined, // undefined, or lazy symbols (the last one is symbols in archive // files whose archive members are not yet loaded). @@ -39,40 +39,39 @@ template <class ELFT> class SymbolTable { typedef typename ELFT::uint uintX_t; public: - void addFile(std::unique_ptr<InputFile> File); - void addCombinedLtoObject(); - - llvm::ArrayRef<Symbol *> getSymbols() const { return SymVector; } + void addFile(InputFile *File); + void addCombinedLTOObject(); - const std::vector<std::unique_ptr<ObjectFile<ELFT>>> &getObjectFiles() const { - return ObjectFiles; - } - - const std::vector<std::unique_ptr<SharedFile<ELFT>>> &getSharedFiles() const { - return SharedFiles; - } + ArrayRef<Symbol *> getSymbols() const { return SymVector; } + ArrayRef<ObjectFile<ELFT> *> getObjectFiles() const { return ObjectFiles; } + ArrayRef<BinaryFile *> getBinaryFiles() const { return BinaryFiles; } + ArrayRef<SharedFile<ELFT> *> getSharedFiles() const { return SharedFiles; } DefinedRegular<ELFT> *addAbsolute(StringRef Name, - uint8_t Visibility = llvm::ELF::STV_HIDDEN); + uint8_t Visibility = llvm::ELF::STV_HIDDEN, + uint8_t Binding = llvm::ELF::STB_GLOBAL); DefinedRegular<ELFT> *addIgnored(StringRef Name, uint8_t Visibility = llvm::ELF::STV_HIDDEN); Symbol *addUndefined(StringRef Name); - Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type, bool CanOmitFromDynSym, InputFile *File); - - Symbol *addRegular(StringRef Name, const Elf_Sym &Sym, - InputSectionBase<ELFT> *Section); - Symbol *addRegular(StringRef Name, uint8_t Binding, uint8_t StOther); - Symbol *addSynthetic(StringRef N, OutputSectionBase<ELFT> *Section, - uintX_t Value); + Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, + uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, + InputFile *File); + + Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, uint8_t Binding, + InputSectionBase<ELFT> *Section, InputFile *File); + + Symbol *addSynthetic(StringRef N, const OutputSectionBase *Section, + uintX_t Value, uint8_t StOther); + void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef); void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S); void addLazyObject(StringRef Name, LazyObjectFile &Obj); - Symbol *addBitcode(StringRef Name, bool IsWeak, uint8_t StOther, uint8_t Type, - bool CanOmitFromDynSym, BitcodeFile *File); + Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, + uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *File); Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, @@ -80,28 +79,32 @@ public: void scanUndefinedFlags(); void scanShlibUndefined(); - void scanDynamicList(); void scanVersionScript(); - void scanSymbolVersions(); SymbolBody *find(StringRef Name); void trace(StringRef Name); void wrap(StringRef Name); + std::vector<InputSectionBase<ELFT> *> Sections; + private: - std::vector<SymbolBody *> findAll(StringRef Pattern); std::pair<Symbol *, bool> insert(StringRef Name); std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, - bool IsUsedInRegularObj, InputFile *File); + InputFile *File); - std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile); - void reportDuplicate(SymbolBody *Existing, InputFile *NewFile); + std::vector<SymbolBody *> findByVersion(SymbolVersion Ver); + std::vector<SymbolBody *> findAllByVersion(SymbolVersion Ver); - std::map<std::string, SymbolBody *> getDemangledSyms(); + llvm::StringMap<std::vector<SymbolBody *>> &getDemangledSyms(); + void handleAnonymousVersion(); + void assignExactVersion(SymbolVersion Ver, uint16_t VersionId, + StringRef VersionName); + void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId); struct SymIndex { + SymIndex(int Idx, bool Traced) : Idx(Idx), Traced(Traced) {} int Idx : 31; unsigned Traced : 1; }; @@ -113,26 +116,30 @@ private: // but a bit inefficient. // FIXME: Experiment with passing in a custom hashing or sorting the symbols // once symbol resolution is finished. - llvm::DenseMap<SymName, SymIndex> Symtab; + llvm::DenseMap<llvm::CachedHashStringRef, SymIndex> Symtab; std::vector<Symbol *> SymVector; - llvm::BumpPtrAllocator Alloc; // Comdat groups define "link once" sections. If two comdat groups have the // same name, only one of them is linked, and the other is ignored. This set // is used to uniquify them. - llvm::DenseSet<StringRef> ComdatGroups; + llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups; - // The symbol table owns all file objects. - std::vector<std::unique_ptr<ArchiveFile>> ArchiveFiles; - std::vector<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles; - std::vector<std::unique_ptr<LazyObjectFile>> LazyObjectFiles; - std::vector<std::unique_ptr<SharedFile<ELFT>>> SharedFiles; - std::vector<std::unique_ptr<BitcodeFile>> BitcodeFiles; + std::vector<ObjectFile<ELFT> *> ObjectFiles; + std::vector<SharedFile<ELFT> *> SharedFiles; + std::vector<BitcodeFile *> BitcodeFiles; + std::vector<BinaryFile *> BinaryFiles; // Set of .so files to not link the same shared object file more than once. llvm::DenseSet<StringRef> SoNames; - std::unique_ptr<BitcodeCompiler> Lto; + // A map from demangled symbol names to their symbol objects. + // This mapping is 1:N because two symbols with different versions + // can have the same name. We use this map to handle "extern C++ {}" + // directive in version scripts. + llvm::Optional<llvm::StringMap<std::vector<SymbolBody *>>> DemangledSyms; + + // For LTO. + std::unique_ptr<BitcodeCompiler> LTO; }; template <class ELFT> struct Symtab { static SymbolTable<ELFT> *X; }; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp index d6a605d11183..a2133f411c20 100644 --- a/contrib/llvm/tools/lld/ELF/Symbols.cpp +++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp @@ -12,9 +12,14 @@ #include "InputFiles.h" #include "InputSection.h" #include "OutputSections.h" +#include "Strings.h" +#include "SyntheticSections.h" #include "Target.h" +#include "Writer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Path.h" +#include <cstring> using namespace llvm; using namespace llvm::object; @@ -30,27 +35,27 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body, switch (Body.kind()) { case SymbolBody::DefinedSyntheticKind: { - auto &D = cast<DefinedSynthetic<ELFT>>(Body); - const OutputSectionBase<ELFT> *Sec = D.Section; + auto &D = cast<DefinedSynthetic>(Body); + const OutputSectionBase *Sec = D.Section; if (!Sec) return D.Value; - if (D.Value == DefinedSynthetic<ELFT>::SectionEnd) - return Sec->getVA() + Sec->getSize(); - return Sec->getVA() + D.Value; + if (D.Value == uintX_t(-1)) + return Sec->Addr + Sec->Size; + return Sec->Addr + D.Value; } case SymbolBody::DefinedRegularKind: { auto &D = cast<DefinedRegular<ELFT>>(Body); - InputSectionBase<ELFT> *SC = D.Section; + InputSectionBase<ELFT> *IS = D.Section; // According to the ELF spec reference to a local symbol from outside // the group are not allowed. Unfortunately .eh_frame breaks that rule // and must be treated specially. For now we just replace the symbol with // 0. - if (SC == &InputSection<ELFT>::Discarded) + if (IS == &InputSection<ELFT>::Discarded) return 0; // This is an absolute symbol. - if (!SC) + if (!IS) return D.Value; uintX_t Offset = D.Value; @@ -58,20 +63,25 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body, Offset += Addend; Addend = 0; } - uintX_t VA = SC->OutSec->getVA() + SC->getOffset(Offset); - if (D.isTls()) + uintX_t VA = (IS->OutSec ? IS->OutSec->Addr : 0) + IS->getOffset(Offset); + if (D.isTls() && !Config->Relocatable) { + if (!Out<ELFT>::TlsPhdr) + fatal(toString(D.File) + + " has a STT_TLS symbol but doesn't have a PT_TLS section"); return VA - Out<ELFT>::TlsPhdr->p_vaddr; + } return VA; } case SymbolBody::DefinedCommonKind: - return Out<ELFT>::Bss->getVA() + cast<DefinedCommon>(Body).OffsetInBss; + return In<ELFT>::Common->OutSec->Addr + In<ELFT>::Common->OutSecOff + + cast<DefinedCommon>(Body).Offset; case SymbolBody::SharedKind: { auto &SS = cast<SharedSymbol<ELFT>>(Body); if (!SS.NeedsCopyOrPltAddr) return 0; if (SS.isFunc()) return Body.getPltVA<ELFT>(); - return Out<ELFT>::Bss->getVA() + SS.OffsetInBss; + return Out<ELFT>::Bss->Addr + SS.OffsetInBss; } case SymbolBody::UndefinedKind: return 0; @@ -79,32 +89,15 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body, case SymbolBody::LazyObjectKind: assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer"); return 0; - case SymbolBody::DefinedBitcodeKind: - llvm_unreachable("should have been replaced"); } llvm_unreachable("invalid symbol kind"); } -SymbolBody::SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true), - IsInGlobalMipsGot(false), Type(Type), StOther(StOther), - NameOffset(NameOffset) {} - -SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false), - IsInGlobalMipsGot(false), Type(Type), StOther(StOther), - Name({Name.data(), Name.size()}) {} - -StringRef SymbolBody::getName() const { - assert(!isLocal()); - return StringRef(Name.S, Name.Len); -} - -void SymbolBody::setName(StringRef S) { - Name.S = S.data(); - Name.Len = S.size(); -} + : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal), + IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), + IsInIgot(false), Type(Type), StOther(StOther), Name(Name) {} // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. @@ -151,7 +144,7 @@ typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const { } template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const { - return Out<ELFT>::Got->getVA() + getGotOffset<ELFT>(); + return In<ELFT>::Got->getVA() + getGotOffset<ELFT>(); } template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const { @@ -159,7 +152,9 @@ template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const { } template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const { - return Out<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>(); + if (this->IsInIgot) + return In<ELFT>::IgotPlt->getVA() + getGotPltOffset<ELFT>(); + return In<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>(); } template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const { @@ -167,7 +162,9 @@ template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const { } template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const { - return Out<ELFT>::Plt->getVA() + Target->PltHeaderSize + + if (this->IsInIplt) + return In<ELFT>::Iplt->getVA() + PltIndex * Target->PltEntrySize; + return In<ELFT>::Plt->getVA() + Target->PltHeaderSize + PltIndex * Target->PltEntrySize; } @@ -189,48 +186,67 @@ template <class ELFT> typename ELFT::uint SymbolBody::getSize() const { return 0; } -Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) - : SymbolBody(K, Name, StOther, Type) {} - -Defined::Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type) - : SymbolBody(K, NameOffset, StOther, Type) {} +// If a symbol name contains '@', the characters after that is +// a symbol version name. This function parses that. +void SymbolBody::parseSymbolVersion() { + StringRef S = getName(); + size_t Pos = S.find('@'); + if (Pos == 0 || Pos == StringRef::npos) + return; + StringRef Verstr = S.substr(Pos + 1); + if (Verstr.empty()) + return; + + // Truncate the symbol name so that it doesn't include the version string. + Name = {S.data(), Pos}; + + // '@@' in a symbol name means the default version. + // It is usually the most recent one. + bool IsDefault = (Verstr[0] == '@'); + if (IsDefault) + Verstr = Verstr.substr(1); + + for (VersionDefinition &Ver : Config->VersionDefinitions) { + if (Ver.Name != Verstr) + continue; + + if (IsDefault) + symbol()->VersionId = Ver.Id; + else + symbol()->VersionId = Ver.Id | VERSYM_HIDDEN; + return; + } -DefinedBitcode::DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, - BitcodeFile *F) - : Defined(DefinedBitcodeKind, Name, StOther, Type) { - this->File = F; + // It is an error if the specified version is not defined. + error(toString(File) + ": symbol " + S + " has undefined version " + Verstr); } -bool DefinedBitcode::classof(const SymbolBody *S) { - return S->kind() == DefinedBitcodeKind; -} +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type) + : SymbolBody(K, Name, IsLocal, StOther, Type) {} -Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type, - InputFile *File) - : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) { - this->File = File; +template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const { + if (!Section || !isFunc()) + return false; + return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC || + (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); } -Undefined::Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type, - InputFile *File) - : SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) { +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) + : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { this->File = File; } -template <typename ELFT> -DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value, - OutputSectionBase<ELFT> *Section) - : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */), - Value(Value), Section(Section) {} - -DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File) - : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type), + : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, + Type), Alignment(Alignment), Size(Size) { this->File = File; } -std::unique_ptr<InputFile> Lazy::fetch() { +InputFile *Lazy::fetch() { if (auto *S = dyn_cast<LazyArchive>(this)) return S->fetch(); return cast<LazyObject>(this)->fetch(); @@ -247,20 +263,20 @@ LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type) this->File = &File; } -std::unique_ptr<InputFile> LazyArchive::fetch() { - MemoryBufferRef MBRef = file()->getMember(&Sym); +InputFile *LazyArchive::fetch() { + std::pair<MemoryBufferRef, uint64_t> MBInfo = file()->getMember(&Sym); // getMember returns an empty buffer if the member was already // read from the library. - if (MBRef.getBuffer().empty()) - return std::unique_ptr<InputFile>(nullptr); - return createObjectFile(MBRef, file()->getName()); + if (MBInfo.first.getBuffer().empty()) + return nullptr; + return createObjectFile(MBInfo.first, file()->getName(), MBInfo.second); } -std::unique_ptr<InputFile> LazyObject::fetch() { +InputFile *LazyObject::fetch() { MemoryBufferRef MBRef = file()->getBuffer(); if (MBRef.getBuffer().empty()) - return std::unique_ptr<InputFile>(nullptr); + return nullptr; return createObjectFile(MBRef); } @@ -274,7 +290,7 @@ bool Symbol::includeInDynsym() const { // Print out a log message for --trace-symbol. void elf::printTraceSymbol(Symbol *Sym) { SymbolBody *B = Sym->body(); - outs() << getFilename(B->File); + outs() << toString(B->File); if (B->isUndefined()) outs() << ": reference to "; @@ -285,6 +301,14 @@ void elf::printTraceSymbol(Symbol *Sym) { outs() << B->getName() << "\n"; } +// Returns a symbol for an error message. +std::string elf::toString(const SymbolBody &B) { + if (Config->Demangle) + if (Optional<std::string> S = demangle(B.getName())) + return *S; + return B.getName(); +} + template bool SymbolBody::hasThunk<ELF32LE>() const; template bool SymbolBody::hasThunk<ELF32BE>() const; template bool SymbolBody::hasThunk<ELF64LE>() const; @@ -330,7 +354,7 @@ template uint32_t SymbolBody::template getSize<ELF32BE>() const; template uint64_t SymbolBody::template getSize<ELF64LE>() const; template uint64_t SymbolBody::template getSize<ELF64BE>() const; -template class elf::DefinedSynthetic<ELF32LE>; -template class elf::DefinedSynthetic<ELF32BE>; -template class elf::DefinedSynthetic<ELF64LE>; -template class elf::DefinedSynthetic<ELF64BE>; +template class elf::DefinedRegular<ELF32LE>; +template class elf::DefinedRegular<ELF32BE>; +template class elf::DefinedRegular<ELF64LE>; +template class elf::DefinedRegular<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h index aa9a87d3b4f7..c95241a5293e 100644 --- a/contrib/llvm/tools/lld/ELF/Symbols.h +++ b/contrib/llvm/tools/lld/ELF/Symbols.h @@ -16,11 +16,11 @@ #define LLD_ELF_SYMBOLS_H #include "InputSection.h" +#include "Strings.h" #include "lld/Core/LLVM.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" -#include "llvm/Support/AlignOf.h" namespace lld { namespace elf { @@ -29,10 +29,9 @@ class ArchiveFile; class BitcodeFile; class InputFile; class LazyObjectFile; -class SymbolBody; template <class ELFT> class ObjectFile; template <class ELFT> class OutputSection; -template <class ELFT> class OutputSectionBase; +class OutputSectionBase; template <class ELFT> class SharedFile; struct Symbol; @@ -45,7 +44,6 @@ public: DefinedRegularKind = DefinedFirst, SharedKind, DefinedCommonKind, - DefinedBitcodeKind, DefinedSyntheticKind, DefinedLast = DefinedSyntheticKind, UndefinedKind, @@ -71,22 +69,10 @@ public: bool isShared() const { return SymbolKind == SharedKind; } bool isLocal() const { return IsLocal; } bool isPreemptible() const; - - StringRef getName() const; - void setName(StringRef S); - - uint32_t getNameOffset() const { - assert(isLocal()); - return NameOffset; - } - + StringRef getName() const { return Name; } uint8_t getVisibility() const { return StOther & 0x3; } + void parseSymbolVersion(); - unsigned DynsymIndex = 0; - uint32_t GotIndex = -1; - uint32_t GotPltIndex = -1; - uint32_t PltIndex = -1; - uint32_t GlobalDynIndex = -1; bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } template <class ELFT> bool hasThunk() const; @@ -105,10 +91,15 @@ public: // The file from which this symbol was created. InputFile *File = nullptr; -protected: - SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); + uint32_t DynsymIndex = 0; + uint32_t GotIndex = -1; + uint32_t GotPltIndex = -1; + uint32_t PltIndex = -1; + uint32_t GlobalDynIndex = -1; - SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type); +protected: + SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type); const unsigned SymbolKind : 8; @@ -123,6 +114,15 @@ public: // True if this symbol has an entry in the global part of MIPS GOT. unsigned IsInGlobalMipsGot : 1; + // True if this symbol is referenced by 32-bit GOT relocations. + unsigned Is32BitMipsGot : 1; + + // True if this symbol is in the Iplt sub-section of the Plt. + unsigned IsInIplt : 1; + + // True if this symbol is in the Igot sub-section of the .got.plt or .got. + unsigned IsInIgot : 1; + // The following fields have the same meaning as the ELF symbol attributes. uint8_t Type; // symbol type uint8_t StOther; // st_other field value @@ -142,32 +142,16 @@ public: bool isFile() const { return Type == llvm::ELF::STT_FILE; } protected: - struct Str { - const char *S; - size_t Len; - }; - union { - Str Name; - uint32_t NameOffset; - }; + StringRefZ Name; }; // The base class for any defined symbols. class Defined : public SymbolBody { public: - Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); - Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type); + Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); static bool classof(const SymbolBody *S) { return S->isDefined(); } }; -// The defined symbol in LLVM bitcode files. -class DefinedBitcode : public Defined { -public: - DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, BitcodeFile *F); - static bool classof(const SymbolBody *S); - BitcodeFile *file() { return (BitcodeFile *)this->File; } -}; - class DefinedCommon : public Defined { public: DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther, @@ -179,7 +163,7 @@ public: // The output offset of this common symbol in the output bss. Computed by the // writer. - uint64_t OffsetInBss; + uint64_t Offset; // The maximum alignment we have seen for this symbol. uint64_t Alignment; @@ -193,30 +177,17 @@ template <class ELFT> class DefinedRegular : public Defined { typedef typename ELFT::uint uintX_t; public: - DefinedRegular(StringRef Name, const Elf_Sym &Sym, - InputSectionBase<ELFT> *Section) - : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other, - Sym.getType()), - Value(Sym.st_value), Size(Sym.st_size), + DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section, + InputFile *File) + : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), + Value(Value), Size(Size), Section(Section ? Section->Repl : NullInputSection) { - if (Section) - this->File = Section->getFile(); + this->File = File; } - DefinedRegular(const Elf_Sym &Sym, InputSectionBase<ELFT> *Section) - : Defined(SymbolBody::DefinedRegularKind, Sym.st_name, Sym.st_other, - Sym.getType()), - Value(Sym.st_value), Size(Sym.st_size), - Section(Section ? Section->Repl : NullInputSection) { - assert(isLocal()); - if (Section) - this->File = Section->getFile(); - } - - DefinedRegular(StringRef Name, uint8_t StOther) - : Defined(SymbolBody::DefinedRegularKind, Name, StOther, - llvm::ELF::STT_NOTYPE), - Value(0), Size(0), Section(NullInputSection) {} + // Return true if the symbol is a PIC function. + bool isMipsPIC() const; static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedRegularKind; @@ -249,28 +220,26 @@ InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection; // don't belong to any input files or sections. Thus, its constructor // takes an output section to calculate output VA, etc. // If Section is null, this symbol is relative to the image base. -template <class ELFT> class DefinedSynthetic : public Defined { +class DefinedSynthetic : public Defined { public: - typedef typename ELFT::uint uintX_t; - DefinedSynthetic(StringRef N, uintX_t Value, - OutputSectionBase<ELFT> *Section); + DefinedSynthetic(StringRef Name, uint64_t Value, + const OutputSectionBase *Section) + : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false, + llvm::ELF::STV_HIDDEN, 0 /* Type */), + Value(Value), Section(Section) {} static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedSyntheticKind; } - // Special value designates that the symbol 'points' - // to the end of the section. - static const uintX_t SectionEnd = uintX_t(-1); - - uintX_t Value; - const OutputSectionBase<ELFT> *Section; + uint64_t Value; + const OutputSectionBase *Section; }; class Undefined : public SymbolBody { public: - Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F); - Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type, InputFile *F); + Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, + InputFile *F); static bool classof(const SymbolBody *S) { return S->kind() == UndefinedKind; @@ -291,7 +260,8 @@ public: SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, const Elf_Verdef *Verdef) - : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()), + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other, + Sym.getType()), Sym(Sym), Verdef(Verdef) { // IFuncs defined in DSOs are treated as functions by the static linker. if (isGnuIFunc()) @@ -326,11 +296,11 @@ public: // Returns an object file for this symbol, or a nullptr if the file // was already returned. - std::unique_ptr<InputFile> fetch(); + InputFile *fetch(); protected: Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) - : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {} + : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {} }; // LazyArchive symbols represents symbols in archive files. @@ -344,7 +314,7 @@ public: } ArchiveFile *file() { return (ArchiveFile *)this->File; } - std::unique_ptr<InputFile> fetch(); + InputFile *fetch(); private: const llvm::object::Archive::Symbol Sym; @@ -361,12 +331,15 @@ public: } LazyObjectFile *file() { return (LazyObjectFile *)this->File; } - std::unique_ptr<InputFile> fetch(); + InputFile *fetch(); }; // Some linker-generated symbols need to be created as // DefinedRegular symbols. template <class ELFT> struct ElfSym { + // The content for __ehdr_start symbol. + static DefinedRegular<ELFT> *EhdrStart; + // The content for _etext and etext symbols. static DefinedRegular<ELFT> *Etext; static DefinedRegular<ELFT> *Etext2; @@ -379,17 +352,22 @@ template <class ELFT> struct ElfSym { static DefinedRegular<ELFT> *End; static DefinedRegular<ELFT> *End2; - // The content for _gp_disp symbol for MIPS target. - static SymbolBody *MipsGpDisp; + // The content for _gp_disp/__gnu_local_gp symbols for MIPS target. + static DefinedRegular<ELFT> *MipsGpDisp; + static DefinedRegular<ELFT> *MipsLocalGp; + static DefinedRegular<ELFT> *MipsGp; }; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::EhdrStart; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End; template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2; -template <class ELFT> SymbolBody *ElfSym<ELFT>::MipsGpDisp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGpDisp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsLocalGp; +template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGp; // A real symbol object, SymbolBody, is usually stored within a Symbol. There's // always one Symbol for each symbol name. The resolver updates the SymbolBody @@ -425,6 +403,9 @@ struct Symbol { // True if this symbol is specified by --trace-symbol option. unsigned Traced : 1; + // This symbol version was found in a version script. + unsigned InVersionScript : 1; + bool includeInDynsym() const; bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } @@ -434,9 +415,8 @@ struct Symbol { // assume that the size and alignment of ELF64LE symbols is sufficient for any // ELFT, and we verify this with the static_asserts in replaceBody. llvm::AlignedCharArrayUnion< - DefinedBitcode, DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, - DefinedSynthetic<llvm::object::ELF64LE>, Undefined, - SharedSymbol<llvm::object::ELF64LE>, LazyArchive, LazyObject> + DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, DefinedSynthetic, + Undefined, SharedSymbol<llvm::object::ELF64LE>, LazyArchive, LazyObject> Body; SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); } @@ -448,8 +428,7 @@ void printTraceSymbol(Symbol *Sym); template <typename T, typename... ArgT> void replaceBody(Symbol *S, ArgT &&... Arg) { static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); - static_assert(llvm::AlignOf<T>::Alignment <= - llvm::AlignOf<decltype(S->Body)>::Alignment, + static_assert(alignof(T) <= alignof(decltype(S->Body)), "Body not aligned enough"); assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr && "Not a SymbolBody"); @@ -468,6 +447,8 @@ inline Symbol *SymbolBody::symbol() { offsetof(Symbol, Body)); } +std::string toString(const SymbolBody &B); + } // namespace elf } // namespace lld diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp new file mode 100644 index 000000000000..3c8a439ba308 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -0,0 +1,1990 @@ +//===- SyntheticSections.cpp ----------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains linker-synthesized sections. Currently, +// synthetic sections are created either output sections or input sections, +// but we are rewriting code so that all synthetic sections are created as +// input sections. +// +//===----------------------------------------------------------------------===// + +#include "SyntheticSections.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Strings.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Threads.h" +#include "Writer.h" +#include "lld/Config/Version.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/xxhash.h" +#include <cstdlib> + +using namespace llvm; +using namespace llvm::dwarf; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +template <class ELFT> static std::vector<DefinedCommon *> getCommonSymbols() { + std::vector<DefinedCommon *> V; + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) + if (auto *B = dyn_cast<DefinedCommon>(S->body())) + V.push_back(B); + return V; +} + +// Find all common symbols and allocate space for them. +template <class ELFT> InputSection<ELFT> *elf::createCommonSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, + ArrayRef<uint8_t>(), "COMMON"); + Ret->Live = true; + + // Sort the common symbols by alignment as an heuristic to pack them better. + std::vector<DefinedCommon *> Syms = getCommonSymbols<ELFT>(); + std::stable_sort(Syms.begin(), Syms.end(), + [](const DefinedCommon *A, const DefinedCommon *B) { + return A->Alignment > B->Alignment; + }); + + // Assign offsets to symbols. + size_t Size = 0; + size_t Alignment = 1; + for (DefinedCommon *Sym : Syms) { + Alignment = std::max<size_t>(Alignment, Sym->Alignment); + Size = alignTo(Size, Sym->Alignment); + + // Compute symbol offset relative to beginning of input section. + Sym->Offset = Size; + Size += Sym->Size; + } + Ret->Alignment = Alignment; + Ret->Data = makeArrayRef<uint8_t>(nullptr, Size); + return Ret; +} + +// Returns an LLD version string. +static ArrayRef<uint8_t> getVersion() { + // Check LLD_VERSION first for ease of testing. + // You can get consitent output by using the environment variable. + // This is only for testing. + StringRef S = getenv("LLD_VERSION"); + if (S.empty()) + S = Saver.save(Twine("Linker: ") + getLLDVersion()); + + // +1 to include the terminating '\0'. + return {(const uint8_t *)S.data(), S.size() + 1}; +} + +// Creates a .comment section containing LLD version info. +// With this feature, you can identify LLD-generated binaries easily +// by "objdump -s -j .comment <file>". +// The returned object is a mergeable string section. +template <class ELFT> MergeInputSection<ELFT> *elf::createCommentSection() { + typename ELFT::Shdr Hdr = {}; + Hdr.sh_flags = SHF_MERGE | SHF_STRINGS; + Hdr.sh_type = SHT_PROGBITS; + Hdr.sh_entsize = 1; + Hdr.sh_addralign = 1; + + auto *Ret = make<MergeInputSection<ELFT>>(/*file=*/nullptr, &Hdr, ".comment"); + Ret->Data = getVersion(); + Ret->splitIntoPieces(); + return Ret; +} + +// .MIPS.abiflags section. +template <class ELFT> +MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + Flags(Flags) {} + +template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *Buf) { + memcpy(Buf, &Flags, sizeof(Flags)); +} + +template <class ELFT> +MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { + Elf_Mips_ABIFlags Flags = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_ABIFLAGS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + const size_t Size = Sec->Data.size(); + // Older version of BFD (such as the default FreeBSD linker) concatenate + // .MIPS.abiflags instead of merging. To allow for this case (or potential + // zero padding) we ignore everything after the first Elf_Mips_ABIFlags + if (Size < sizeof(Elf_Mips_ABIFlags)) { + error(Filename + ": invalid size of .MIPS.abiflags section: got " + + Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); + return nullptr; + } + auto *S = reinterpret_cast<const Elf_Mips_ABIFlags *>(Sec->Data.data()); + if (S->version != 0) { + error(Filename + ": unexpected .MIPS.abiflags version " + + Twine(S->version)); + return nullptr; + } + + // LLD checks ISA compatibility in getMipsEFlags(). Here we just + // select the highest number of ISA/Rev/Ext. + Flags.isa_level = std::max(Flags.isa_level, S->isa_level); + Flags.isa_rev = std::max(Flags.isa_rev, S->isa_rev); + Flags.isa_ext = std::max(Flags.isa_ext, S->isa_ext); + Flags.gpr_size = std::max(Flags.gpr_size, S->gpr_size); + Flags.cpr1_size = std::max(Flags.cpr1_size, S->cpr1_size); + Flags.cpr2_size = std::max(Flags.cpr2_size, S->cpr2_size); + Flags.ases |= S->ases; + Flags.flags1 |= S->flags1; + Flags.flags2 |= S->flags2; + Flags.fp_abi = elf::getMipsFpAbiFlag(Flags.fp_abi, S->fp_abi, Filename); + }; + + if (Create) + return make<MipsAbiFlagsSection<ELFT>>(Flags); + return nullptr; +} + +// .MIPS.options section. +template <class ELFT> +MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) { + auto *Options = reinterpret_cast<Elf_Mips_Options *>(Buf); + Options->kind = ODK_REGINFO; + Options->size = getSize(); + + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { + // N64 ABI only. + if (!ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_OPTIONS) + continue; + Sec->Live = false; + Create = true; + + std::string Filename = toString(Sec->getFile()); + ArrayRef<uint8_t> D = Sec->Data; + + while (!D.empty()) { + if (D.size() < sizeof(Elf_Mips_Options)) { + error(Filename + ": invalid size of .MIPS.options section"); + break; + } + + auto *Opt = reinterpret_cast<const Elf_Mips_Options *>(D.data()); + if (Opt->kind == ODK_REGINFO) { + if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) + error(Filename + ": unsupported non-zero ri_gp_value"); + Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; + Sec->getFile()->MipsGp0 = Opt->getRegInfo().ri_gp_value; + break; + } + + if (!Opt->size) + fatal(Filename + ": zero option descriptor size"); + D = D.slice(Opt->size); + } + }; + + if (Create) + return make<MipsOptionsSection<ELFT>>(Reginfo); + return nullptr; +} + +// MIPS .reginfo section. +template <class ELFT> +MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo) + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + Reginfo(Reginfo) {} + +template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) { + if (!Config->Relocatable) + Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp(); + memcpy(Buf, &Reginfo, sizeof(Reginfo)); +} + +template <class ELFT> +MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { + // Section should be alive for O32 and N32 ABIs only. + if (ELFT::Is64Bits) + return nullptr; + + Elf_Mips_RegInfo Reginfo = {}; + bool Create = false; + + for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + if (!Sec->Live || Sec->Type != SHT_MIPS_REGINFO) + continue; + Sec->Live = false; + Create = true; + + if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { + error(toString(Sec->getFile()) + ": invalid size of .reginfo section"); + return nullptr; + } + auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); + if (Config->Relocatable && R->ri_gp_value) + error(toString(Sec->getFile()) + ": unsupported non-zero ri_gp_value"); + + Reginfo.ri_gprmask |= R->ri_gprmask; + Sec->getFile()->MipsGp0 = R->ri_gp_value; + }; + + if (Create) + return make<MipsReginfoSection<ELFT>>(Reginfo); + return nullptr; +} + +template <class ELFT> InputSection<ELFT> *elf::createInterpSection() { + auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 1, + ArrayRef<uint8_t>(), ".interp"); + Ret->Live = true; + + // StringSaver guarantees that the returned string ends with '\0'. + StringRef S = Saver.save(Config->DynamicLinker); + Ret->Data = {(const uint8_t *)S.data(), S.size() + 1}; + return Ret; +} + +static size_t getHashSize() { + switch (Config->BuildId) { + case BuildIdKind::Fast: + return 8; + case BuildIdKind::Md5: + case BuildIdKind::Uuid: + return 16; + case BuildIdKind::Sha1: + return 20; + case BuildIdKind::Hexstring: + return Config->BuildIdVector.size(); + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +BuildIdSection<ELFT>::BuildIdSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), + HashSize(getHashSize()) {} + +template <class ELFT> void BuildIdSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, 4); // Name size + write32<E>(Buf + 4, HashSize); // Content size + write32<E>(Buf + 8, NT_GNU_BUILD_ID); // Type + memcpy(Buf + 12, "GNU", 4); // Name string + HashBuf = Buf + 16; +} + +// Split one uint8 array into small pieces of uint8 arrays. +static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr, + size_t ChunkSize) { + std::vector<ArrayRef<uint8_t>> Ret; + while (Arr.size() > ChunkSize) { + Ret.push_back(Arr.take_front(ChunkSize)); + Arr = Arr.drop_front(ChunkSize); + } + if (!Arr.empty()) + Ret.push_back(Arr); + return Ret; +} + +// Computes a hash value of Data using a given hash function. +// In order to utilize multiple cores, we first split data into 1MB +// chunks, compute a hash for each chunk, and then compute a hash value +// of the hash values. +template <class ELFT> +void BuildIdSection<ELFT>::computeHash( + llvm::ArrayRef<uint8_t> Data, + std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) { + std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024); + std::vector<uint8_t> Hashes(Chunks.size() * HashSize); + + // Compute hash values. + forLoop(0, Chunks.size(), + [&](size_t I) { HashFn(Hashes.data() + I * HashSize, Chunks[I]); }); + + // Write to the final output buffer. + HashFn(HashBuf, Hashes); +} + +template <class ELFT> +void BuildIdSection<ELFT>::writeBuildId(ArrayRef<uint8_t> Buf) { + switch (Config->BuildId) { + case BuildIdKind::Fast: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + write64le(Dest, xxHash64(toStringRef(Arr))); + }); + break; + case BuildIdKind::Md5: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, MD5::hash(Arr).data(), 16); + }); + break; + case BuildIdKind::Sha1: + computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { + memcpy(Dest, SHA1::hash(Arr).data(), 20); + }); + break; + case BuildIdKind::Uuid: + if (getRandomBytes(HashBuf, HashSize)) + error("entropy source failure"); + break; + case BuildIdKind::Hexstring: + memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size()); + break; + default: + llvm_unreachable("unknown BuildIdKind"); + } +} + +template <class ELFT> +GotSection<ELFT>::GotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotEntrySize, ".got") {} + +template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotIndex = NumEntries; + ++NumEntries; +} + +template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = NumEntries; + // Global Dynamic TLS entries take two GOT slots. + NumEntries += 2; + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool GotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = NumEntries * sizeof(uintX_t); + NumEntries += 2; + return true; +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { + return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +typename GotSection<ELFT>::uintX_t +GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> void GotSection<ELFT>::finalize() { + Size = NumEntries * sizeof(uintX_t); +} + +template <class ELFT> bool GotSection<ELFT>::empty() const { + // If we have a relocation that is relative to GOT (such as GOTOFFREL), + // we need to emit a GOT even if it's empty. + return NumEntries == 0 && !HasGotOffRel; +} + +template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { + this->relocate(Buf, Buf + Size); +} + +template <class ELFT> +MipsGotSection<ELFT>::MipsGotSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, + SHT_PROGBITS, Target->GotEntrySize, ".got") {} + +template <class ELFT> +void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, + RelExpr Expr) { + // For "true" local symbols which can be referenced from the same module + // only compiler creates two instructions for address loading: + // + // lw $8, 0($gp) # R_MIPS_GOT16 + // addi $8, $8, 0 # R_MIPS_LO16 + // + // The first instruction loads high 16 bits of the symbol address while + // the second adds an offset. That allows to reduce number of required + // GOT entries because only one global offset table entry is necessary + // for every 64 KBytes of local data. So for local symbols we need to + // allocate number of GOT entries to hold all required "page" addresses. + // + // All global symbols (hidden and regular) considered by compiler uniformly. + // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation + // to load address of the symbol. So for each such symbol we need to + // allocate dedicated GOT entry to store its address. + // + // If a symbol is preemptible we need help of dynamic linker to get its + // final address. The corresponding GOT entries are allocated in the + // "global" part of GOT. Entries for non preemptible global symbol allocated + // in the "local" part of GOT. + // + // See "Global Offset Table" in Chapter 5: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Expr == R_MIPS_GOT_LOCAL_PAGE) { + // At this point we do not know final symbol value so to reduce number + // of allocated GOT entries do the following trick. Save all output + // sections referenced by GOT relocations. Then later in the `finalize` + // method calculate number of "pages" required to cover all saved output + // section and allocate appropriate number of GOT entries. + PageIndexMap.insert({cast<DefinedRegular<ELFT>>(&Sym)->Section->OutSec, 0}); + return; + } + if (Sym.isTls()) { + // GOT entries created for MIPS TLS relocations behave like + // almost GOT entries from other ABIs. They go to the end + // of the global offset table. + Sym.GotIndex = TlsEntries.size(); + TlsEntries.push_back(&Sym); + return; + } + auto AddEntry = [&](SymbolBody &S, uintX_t A, GotEntries &Items) { + if (S.isInGot() && !A) + return; + size_t NewIndex = Items.size(); + if (!EntryIndexMap.insert({{&S, A}, NewIndex}).second) + return; + Items.emplace_back(&S, A); + if (!A) + S.GotIndex = NewIndex; + }; + if (Sym.isPreemptible()) { + // Ignore addends for preemptible symbols. They got single GOT entry anyway. + AddEntry(Sym, 0, GlobalEntries); + Sym.IsInGlobalMipsGot = true; + } else if (Expr == R_MIPS_GOT_OFF32) { + AddEntry(Sym, Addend, LocalEntries32); + Sym.Is32BitMipsGot = true; + } else { + // Hold local GOT entries accessed via a 16-bit index separately. + // That allows to write them in the beginning of the GOT and keep + // their indexes as less as possible to escape relocation's overflow. + AddEntry(Sym, Addend, LocalEntries); + } +} + +template <class ELFT> +bool MipsGotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { + if (Sym.GlobalDynIndex != -1U) + return false; + Sym.GlobalDynIndex = TlsEntries.size(); + // Global Dynamic TLS entries take two GOT slots. + TlsEntries.push_back(nullptr); + TlsEntries.push_back(&Sym); + return true; +} + +// Reserves TLS entries for a TLS module ID and a TLS block offset. +// In total it takes two GOT slots. +template <class ELFT> bool MipsGotSection<ELFT>::addTlsIndex() { + if (TlsIndexOff != uint32_t(-1)) + return false; + TlsIndexOff = TlsEntries.size() * sizeof(uintX_t); + TlsEntries.push_back(nullptr); + TlsEntries.push_back(nullptr); + return true; +} + +static uint64_t getMipsPageAddr(uint64_t Addr) { + return (Addr + 0x8000) & ~0xffff; +} + +static uint64_t getMipsPageCount(uint64_t Size) { + return (Size + 0xfffe) / 0xffff + 1; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getPageEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + const OutputSectionBase *OutSec = + cast<DefinedRegular<ELFT>>(&B)->Section->OutSec; + uintX_t SecAddr = getMipsPageAddr(OutSec->Addr); + uintX_t SymAddr = getMipsPageAddr(B.getVA<ELFT>(Addend)); + uintX_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; + assert(Index < PageEntriesNum); + return (HeaderEntriesNum + Index) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getBodyEntryOffset(const SymbolBody &B, + uintX_t Addend) const { + // Calculate offset of the GOT entries block: TLS, global, local. + uintX_t Index = HeaderEntriesNum + PageEntriesNum; + if (B.isTls()) + Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); + else if (B.IsInGlobalMipsGot) + Index += LocalEntries.size() + LocalEntries32.size(); + else if (B.Is32BitMipsGot) + Index += LocalEntries.size(); + // Calculate offset of the GOT entry in the block. + if (B.isInGot()) + Index += B.GotIndex; + else { + auto It = EntryIndexMap.find({&B, Addend}); + assert(It != EntryIndexMap.end()); + Index += It->second; + } + return Index * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getTlsOffset() const { + return (getLocalEntriesNum() + GlobalEntries.size()) * sizeof(uintX_t); +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t +MipsGotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * sizeof(uintX_t); +} + +template <class ELFT> +const SymbolBody *MipsGotSection<ELFT>::getFirstGlobalEntry() const { + return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; +} + +template <class ELFT> +unsigned MipsGotSection<ELFT>::getLocalEntriesNum() const { + return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + + LocalEntries32.size(); +} + +template <class ELFT> void MipsGotSection<ELFT>::finalize() { + PageEntriesNum = 0; + for (std::pair<const OutputSectionBase *, size_t> &P : PageIndexMap) { + // For each output section referenced by GOT page relocations calculate + // and save into PageIndexMap an upper bound of MIPS GOT entries required + // to store page addresses of local symbols. We assume the worst case - + // each 64kb page of the output section has at least one GOT relocation + // against it. And take in account the case when the section intersects + // page boundaries. + P.second = PageEntriesNum; + PageEntriesNum += getMipsPageCount(P.first->Size); + } + Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * + sizeof(uintX_t); +} + +template <class ELFT> bool MipsGotSection<ELFT>::empty() const { + // We add the .got section to the result for dynamic MIPS target because + // its address and properties are mentioned in the .dynamic section. + return Config->Relocatable; +} + +template <class ELFT> +typename MipsGotSection<ELFT>::uintX_t MipsGotSection<ELFT>::getGp() const { + return ElfSym<ELFT>::MipsGp->template getVA<ELFT>(0); +} + +template <class ELFT> +static void writeUint(uint8_t *Buf, typename ELFT::uint Val) { + typedef typename ELFT::uint uintX_t; + write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Buf, Val); +} + +template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { + // Set the MSB of the second GOT slot. This is not required by any + // MIPS ABI documentation, though. + // + // There is a comment in glibc saying that "The MSB of got[1] of a + // gnu object is set to identify gnu objects," and in GNU gold it + // says "the second entry will be used by some runtime loaders". + // But how this field is being used is unclear. + // + // We are not really willing to mimic other linkers behaviors + // without understanding why they do that, but because all files + // generated by GNU tools have this special GOT value, and because + // we've been doing this for years, it is probably a safe bet to + // keep doing this for now. We really need to revisit this to see + // if we had to do this. + auto *P = reinterpret_cast<typename ELFT::Off *>(Buf); + P[1] = uintX_t(1) << (ELFT::Is64Bits ? 63 : 31); + Buf += HeaderEntriesNum * sizeof(uintX_t); + // Write 'page address' entries to the local part of the GOT. + for (std::pair<const OutputSectionBase *, size_t> &L : PageIndexMap) { + size_t PageCount = getMipsPageCount(L.first->Size); + uintX_t FirstPageAddr = getMipsPageAddr(L.first->Addr); + for (size_t PI = 0; PI < PageCount; ++PI) { + uint8_t *Entry = Buf + (L.second + PI) * sizeof(uintX_t); + writeUint<ELFT>(Entry, FirstPageAddr + PI * 0x10000); + } + } + Buf += PageEntriesNum * sizeof(uintX_t); + auto AddEntry = [&](const GotEntry &SA) { + uint8_t *Entry = Buf; + Buf += sizeof(uintX_t); + const SymbolBody *Body = SA.first; + uintX_t VA = Body->template getVA<ELFT>(SA.second); + writeUint<ELFT>(Entry, VA); + }; + std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); + std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); + std::for_each(std::begin(GlobalEntries), std::end(GlobalEntries), AddEntry); + // Initialize TLS-related GOT entries. If the entry has a corresponding + // dynamic relocations, leave it initialized by zero. Write down adjusted + // TLS symbol's values otherwise. To calculate the adjustments use offsets + // for thread-local storage. + // https://www.linux-mips.org/wiki/NPTL + if (TlsIndexOff != -1U && !Config->Pic) + writeUint<ELFT>(Buf + TlsIndexOff, 1); + for (const SymbolBody *B : TlsEntries) { + if (!B || B->isPreemptible()) + continue; + uintX_t VA = B->getVA<ELFT>(); + if (B->GotIndex != -1U) { + uint8_t *Entry = Buf + B->GotIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x7000); + } + if (B->GlobalDynIndex != -1U) { + uint8_t *Entry = Buf + B->GlobalDynIndex * sizeof(uintX_t); + writeUint<ELFT>(Entry, 1); + Entry += sizeof(uintX_t); + writeUint<ELFT>(Entry, VA - 0x8000); + } + } +} + +template <class ELFT> +GotPltSection<ELFT>::GotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, ".got.plt") {} + +template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t GotPltSection<ELFT>::getSize() const { + return (Target->GotPltHeaderEntriesNum + Entries.size()) * + Target->GotPltEntrySize; +} + +template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { + Target->writeGotPltHeader(Buf); + Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; + for (const SymbolBody *B : Entries) { + Target->writeGotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +// On ARM the IgotPltSection is part of the GotSection, on other Targets it is +// part of the .got.plt +template <class ELFT> +IgotPltSection<ELFT>::IgotPltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, + Config->EMachine == EM_ARM ? ".got" : ".got.plt") { +} + +template <class ELFT> void IgotPltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.IsInIgot = true; + Sym.GotPltIndex = Entries.size(); + Entries.push_back(&Sym); +} + +template <class ELFT> size_t IgotPltSection<ELFT>::getSize() const { + return Entries.size() * Target->GotPltEntrySize; +} + +template <class ELFT> void IgotPltSection<ELFT>::writeTo(uint8_t *Buf) { + for (const SymbolBody *B : Entries) { + Target->writeIgotPlt(Buf, *B); + Buf += sizeof(uintX_t); + } +} + +template <class ELFT> +StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) + : SyntheticSection<ELFT>(Dynamic ? (uintX_t)SHF_ALLOC : 0, SHT_STRTAB, 1, + Name), + Dynamic(Dynamic) {} + +// Adds a string to the string table. If HashIt is true we hash and check for +// duplicates. It is optional because the name of global symbols are already +// uniqued and hashing them again has a big cost for a small value: uniquing +// them with some other string that happens to be the same. +template <class ELFT> +unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { + if (HashIt) { + auto R = StringMap.insert(std::make_pair(S, this->Size)); + if (!R.second) + return R.first->second; + } + unsigned Ret = this->Size; + this->Size = this->Size + S.size() + 1; + Strings.push_back(S); + return Ret; +} + +template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { + // ELF string tables start with NUL byte, so advance the pointer by one. + ++Buf; + for (StringRef S : Strings) { + memcpy(Buf, S.data(), S.size()); + Buf += S.size() + 1; + } +} + +// Returns the number of version definition entries. Because the first entry +// is for the version definition itself, it is the number of versioned symbols +// plus one. Note that we don't support multiple versions yet. +static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } + +template <class ELFT> +DynamicSection<ELFT>::DynamicSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, + sizeof(uintX_t), ".dynamic") { + this->Entsize = ELFT::Is64Bits ? 16 : 8; + // .dynamic section is not writable on MIPS. + // See "Special Section" in Chapter 4 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (Config->EMachine == EM_MIPS) + this->Flags = SHF_ALLOC; + + addEntries(); +} + +// There are some dynamic entries that don't depend on other sections. +// Such entries can be set early. +template <class ELFT> void DynamicSection<ELFT>::addEntries() { + // Add strings to .dynstr early so that .dynstr's size will be + // fixed early. + for (StringRef S : Config->AuxiliaryList) + add({DT_AUXILIARY, In<ELFT>::DynStrTab->addString(S)}); + if (!Config->RPath.empty()) + add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, + In<ELFT>::DynStrTab->addString(Config->RPath)}); + for (SharedFile<ELFT> *F : Symtab<ELFT>::X->getSharedFiles()) + if (F->isNeeded()) + add({DT_NEEDED, In<ELFT>::DynStrTab->addString(F->getSoName())}); + if (!Config->SoName.empty()) + add({DT_SONAME, In<ELFT>::DynStrTab->addString(Config->SoName)}); + + // Set DT_FLAGS and DT_FLAGS_1. + uint32_t DtFlags = 0; + uint32_t DtFlags1 = 0; + if (Config->Bsymbolic) + DtFlags |= DF_SYMBOLIC; + if (Config->ZNodelete) + DtFlags1 |= DF_1_NODELETE; + if (Config->ZNow) { + DtFlags |= DF_BIND_NOW; + DtFlags1 |= DF_1_NOW; + } + if (Config->ZOrigin) { + DtFlags |= DF_ORIGIN; + DtFlags1 |= DF_1_ORIGIN; + } + + if (DtFlags) + add({DT_FLAGS, DtFlags}); + if (DtFlags1) + add({DT_FLAGS_1, DtFlags1}); + + if (!Config->Shared && !Config->Relocatable) + add({DT_DEBUG, (uint64_t)0}); +} + +// Add remaining entries to complete .dynamic contents. +template <class ELFT> void DynamicSection<ELFT>::finalize() { + if (this->Size) + return; // Already finalized. + + this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + if (In<ELFT>::RelaDyn->OutSec->Size > 0) { + bool IsRela = Config->Rela; + add({IsRela ? DT_RELA : DT_REL, In<ELFT>::RelaDyn}); + add({IsRela ? DT_RELASZ : DT_RELSZ, In<ELFT>::RelaDyn->OutSec->Size}); + add({IsRela ? DT_RELAENT : DT_RELENT, + uintX_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); + + // MIPS dynamic loader does not support RELCOUNT tag. + // The problem is in the tight relation between dynamic + // relocations and GOT. So do not emit this tag on MIPS. + if (Config->EMachine != EM_MIPS) { + size_t NumRelativeRels = In<ELFT>::RelaDyn->getRelativeRelocCount(); + if (Config->ZCombreloc && NumRelativeRels) + add({IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels}); + } + } + if (In<ELFT>::RelaPlt->OutSec->Size > 0) { + add({DT_JMPREL, In<ELFT>::RelaPlt}); + add({DT_PLTRELSZ, In<ELFT>::RelaPlt->OutSec->Size}); + add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, + In<ELFT>::GotPlt}); + add({DT_PLTREL, uint64_t(Config->Rela ? DT_RELA : DT_REL)}); + } + + add({DT_SYMTAB, In<ELFT>::DynSymTab}); + add({DT_SYMENT, sizeof(Elf_Sym)}); + add({DT_STRTAB, In<ELFT>::DynStrTab}); + add({DT_STRSZ, In<ELFT>::DynStrTab->getSize()}); + if (In<ELFT>::GnuHashTab) + add({DT_GNU_HASH, In<ELFT>::GnuHashTab}); + if (In<ELFT>::HashTab) + add({DT_HASH, In<ELFT>::HashTab}); + + if (Out<ELFT>::PreinitArray) { + add({DT_PREINIT_ARRAY, Out<ELFT>::PreinitArray}); + add({DT_PREINIT_ARRAYSZ, Out<ELFT>::PreinitArray, Entry::SecSize}); + } + if (Out<ELFT>::InitArray) { + add({DT_INIT_ARRAY, Out<ELFT>::InitArray}); + add({DT_INIT_ARRAYSZ, Out<ELFT>::InitArray, Entry::SecSize}); + } + if (Out<ELFT>::FiniArray) { + add({DT_FINI_ARRAY, Out<ELFT>::FiniArray}); + add({DT_FINI_ARRAYSZ, Out<ELFT>::FiniArray, Entry::SecSize}); + } + + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Init)) + add({DT_INIT, B}); + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Fini)) + add({DT_FINI, B}); + + bool HasVerNeed = In<ELFT>::VerNeed->getNeedNum() != 0; + if (HasVerNeed || In<ELFT>::VerDef) + add({DT_VERSYM, In<ELFT>::VerSym}); + if (In<ELFT>::VerDef) { + add({DT_VERDEF, In<ELFT>::VerDef}); + add({DT_VERDEFNUM, getVerDefNum()}); + } + if (HasVerNeed) { + add({DT_VERNEED, In<ELFT>::VerNeed}); + add({DT_VERNEEDNUM, In<ELFT>::VerNeed->getNeedNum()}); + } + + if (Config->EMachine == EM_MIPS) { + add({DT_MIPS_RLD_VERSION, 1}); + add({DT_MIPS_FLAGS, RHF_NOTPOT}); + add({DT_MIPS_BASE_ADDRESS, Config->ImageBase}); + add({DT_MIPS_SYMTABNO, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_MIPS_LOCAL_GOTNO, In<ELFT>::MipsGot->getLocalEntriesNum()}); + if (const SymbolBody *B = In<ELFT>::MipsGot->getFirstGlobalEntry()) + add({DT_MIPS_GOTSYM, B->DynsymIndex}); + else + add({DT_MIPS_GOTSYM, In<ELFT>::DynSymTab->getNumSymbols()}); + add({DT_PLTGOT, In<ELFT>::MipsGot}); + if (In<ELFT>::MipsRldMap) + add({DT_MIPS_RLD_MAP, In<ELFT>::MipsRldMap}); + } + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link; + + // +1 for DT_NULL + this->Size = (Entries.size() + 1) * this->Entsize; +} + +template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { + auto *P = reinterpret_cast<Elf_Dyn *>(Buf); + + for (const Entry &E : Entries) { + P->d_tag = E.Tag; + switch (E.Kind) { + case Entry::SecAddr: + P->d_un.d_ptr = E.OutSec->Addr; + break; + case Entry::InSecAddr: + P->d_un.d_ptr = E.InSec->OutSec->Addr + E.InSec->OutSecOff; + break; + case Entry::SecSize: + P->d_un.d_val = E.OutSec->Size; + break; + case Entry::SymAddr: + P->d_un.d_ptr = E.Sym->template getVA<ELFT>(); + break; + case Entry::PlainInt: + P->d_un.d_val = E.Val; + break; + } + ++P; + } +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getOffset() const { + if (OutputSec) + return OutputSec->Addr + OffsetInSec; + return InputSec->OutSec->Addr + InputSec->getOffset(OffsetInSec); +} + +template <class ELFT> +typename ELFT::uint DynamicReloc<ELFT>::getAddend() const { + if (UseSymVA) + return Sym->getVA<ELFT>(Addend); + return Addend; +} + +template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { + if (Sym && !UseSymVA) + return Sym->DynsymIndex; + return 0; +} + +template <class ELFT> +RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) + : SyntheticSection<ELFT>(SHF_ALLOC, Config->Rela ? SHT_RELA : SHT_REL, + sizeof(uintX_t), Name), + Sort(Sort) { + this->Entsize = Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); +} + +template <class ELFT> +void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { + if (Reloc.Type == Target->RelativeRel) + ++NumRelativeRelocs; + Relocs.push_back(Reloc); +} + +template <class ELFT, class RelTy> +static bool compRelocations(const RelTy &A, const RelTy &B) { + bool AIsRel = A.getType(Config->Mips64EL) == Target->RelativeRel; + bool BIsRel = B.getType(Config->Mips64EL) == Target->RelativeRel; + if (AIsRel != BIsRel) + return AIsRel; + + return A.getSymbol(Config->Mips64EL) < B.getSymbol(Config->Mips64EL); +} + +template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { + uint8_t *BufBegin = Buf; + for (const DynamicReloc<ELFT> &Rel : Relocs) { + auto *P = reinterpret_cast<Elf_Rela *>(Buf); + Buf += Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + + if (Config->Rela) + P->r_addend = Rel.getAddend(); + P->r_offset = Rel.getOffset(); + if (Config->EMachine == EM_MIPS && Rel.getInputSec() == In<ELFT>::MipsGot) + // Dynamic relocation against MIPS GOT section make deal TLS entries + // allocated in the end of the GOT. We need to adjust the offset to take + // in account 'local' and 'global' GOT entries. + P->r_offset += In<ELFT>::MipsGot->getTlsOffset(); + P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->Mips64EL); + } + + if (Sort) { + if (Config->Rela) + std::stable_sort((Elf_Rela *)BufBegin, + (Elf_Rela *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rela>); + else + std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), + compRelocations<ELFT, Elf_Rel>); + } +} + +template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { + return this->Entsize * Relocs.size(); +} + +template <class ELFT> void RelocationSection<ELFT>::finalize() { + this->Link = In<ELFT>::DynSymTab ? In<ELFT>::DynSymTab->OutSec->SectionIndex + : In<ELFT>::SymTab->OutSec->SectionIndex; + + // Set required output section properties. + this->OutSec->Link = this->Link; + this->OutSec->Entsize = this->Entsize; +} + +template <class ELFT> +SymbolTableSection<ELFT>::SymbolTableSection( + StringTableSection<ELFT> &StrTabSec) + : SyntheticSection<ELFT>(StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0, + StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, + sizeof(uintX_t), + StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), + StrTabSec(StrTabSec) { + this->Entsize = sizeof(Elf_Sym); +} + +// Orders symbols according to their positions in the GOT, +// in compliance with MIPS ABI rules. +// See "Global Offset Table" in Chapter 5 in the following document +// for detailed description: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +static bool sortMipsSymbols(const SymbolBody *L, const SymbolBody *R) { + // Sort entries related to non-local preemptible symbols by GOT indexes. + // All other entries go to the first part of GOT in arbitrary order. + bool LIsInLocalGot = !L->IsInGlobalMipsGot; + bool RIsInLocalGot = !R->IsInGlobalMipsGot; + if (LIsInLocalGot || RIsInLocalGot) + return !RIsInLocalGot; + return L->GotIndex < R->GotIndex; +} + +static uint8_t getSymbolBinding(SymbolBody *Body) { + Symbol *S = Body->symbol(); + if (Config->Relocatable) + return S->Binding; + uint8_t Visibility = S->Visibility; + if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) + return STB_LOCAL; + if (Config->NoGnuUnique && S->Binding == STB_GNU_UNIQUE) + return STB_GLOBAL; + return S->Binding; +} + +template <class ELFT> void SymbolTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = StrTabSec.OutSec->SectionIndex; + this->OutSec->Info = this->Info = NumLocals + 1; + this->OutSec->Entsize = this->Entsize; + + if (Config->Relocatable) { + size_t I = NumLocals; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; + return; + } + + if (!StrTabSec.isDynamic()) { + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return getSymbolBinding(L.Symbol) == STB_LOCAL && + getSymbolBinding(R.Symbol) != STB_LOCAL; + }); + return; + } + if (In<ELFT>::GnuHashTab) + // NB: It also sorts Symbols to meet the GNU hash table requirements. + In<ELFT>::GnuHashTab->addSymbols(Symbols); + else if (Config->EMachine == EM_MIPS) + std::stable_sort(Symbols.begin(), Symbols.end(), + [](const SymbolTableEntry &L, const SymbolTableEntry &R) { + return sortMipsSymbols(L.Symbol, R.Symbol); + }); + size_t I = 0; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; +} + +template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) { + Symbols.push_back({B, StrTabSec.addString(B->getName(), false)}); +} + +template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { + Buf += sizeof(Elf_Sym); + + // All symbols with STB_LOCAL binding precede the weak and global symbols. + // .dynsym only contains global symbols. + if (Config->Discard != DiscardPolicy::All && !StrTabSec.isDynamic()) + writeLocalSymbols(Buf); + + writeGlobalSymbols(Buf); +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { + // Iterate over all input object files to copy their local symbols + // to the output symbol table pointed by Buf. + for (ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (const std::pair<const DefinedRegular<ELFT> *, size_t> &P : + File->KeptLocalSyms) { + const DefinedRegular<ELFT> &Body = *P.first; + InputSectionBase<ELFT> *Section = Body.Section; + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + + if (!Section) { + ESym->st_shndx = SHN_ABS; + ESym->st_value = Body.Value; + } else { + const OutputSectionBase *OutSec = Section->OutSec; + ESym->st_shndx = OutSec->SectionIndex; + ESym->st_value = OutSec->Addr + Section->getOffset(Body); + } + ESym->st_name = P.second; + ESym->st_size = Body.template getSize<ELFT>(); + ESym->setBindingAndType(STB_LOCAL, Body.Type); + Buf += sizeof(*ESym); + } + } +} + +template <class ELFT> +void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { + // Write the internal symbol table contents to the output symbol table + // pointed by Buf. + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + for (const SymbolTableEntry &S : Symbols) { + SymbolBody *Body = S.Symbol; + size_t StrOff = S.StrTabOffset; + + uint8_t Type = Body->Type; + uintX_t Size = Body->getSize<ELFT>(); + + ESym->setBindingAndType(getSymbolBinding(Body), Type); + ESym->st_size = Size; + ESym->st_name = StrOff; + ESym->setVisibility(Body->symbol()->Visibility); + ESym->st_value = Body->getVA<ELFT>(); + + if (const OutputSectionBase *OutSec = getOutputSection(Body)) + ESym->st_shndx = OutSec->SectionIndex; + else if (isa<DefinedRegular<ELFT>>(Body)) + ESym->st_shndx = SHN_ABS; + + if (Config->EMachine == EM_MIPS) { + // On MIPS we need to mark symbol which has a PLT entry and requires + // pointer equality by STO_MIPS_PLT flag. That is necessary to help + // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. + // https://sourceware.org/ml/binutils/2008-07/txt00000.txt + if (Body->isInPlt() && Body->NeedsCopyOrPltAddr) + ESym->st_other |= STO_MIPS_PLT; + if (Config->Relocatable) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (D && D->isMipsPIC()) + ESym->st_other |= STO_MIPS_PIC; + } + } + ++ESym; + } +} + +template <class ELFT> +const OutputSectionBase * +SymbolTableSection<ELFT>::getOutputSection(SymbolBody *Sym) { + switch (Sym->kind()) { + case SymbolBody::DefinedSyntheticKind: + return cast<DefinedSynthetic>(Sym)->Section; + case SymbolBody::DefinedRegularKind: { + auto &D = cast<DefinedRegular<ELFT>>(*Sym); + if (D.Section) + return D.Section->OutSec; + break; + } + case SymbolBody::DefinedCommonKind: + return In<ELFT>::Common->OutSec; + case SymbolBody::SharedKind: + if (cast<SharedSymbol<ELFT>>(Sym)->needsCopy()) + return Out<ELFT>::Bss; + break; + case SymbolBody::UndefinedKind: + case SymbolBody::LazyArchiveKind: + case SymbolBody::LazyObjectKind: + break; + } + return nullptr; +} + +template <class ELFT> +GnuHashTableSection<ELFT>::GnuHashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_HASH, sizeof(uintX_t), + ".gnu.hash") { + this->Entsize = ELFT::Is64Bits ? 0 : 4; +} + +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { + if (!NumHashed) + return 0; + + // These values are prime numbers which are not greater than 2^(N-1) + 1. + // In result, for any particular NumHashed we return a prime number + // which is not greater than NumHashed. + static const unsigned Primes[] = { + 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, + 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; + + return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), + array_lengthof(Primes) - 1)]; +} + +// Bloom filter estimation: at least 8 bits for each hashed symbol. +// GNU Hash table requirement: it should be a power of 2, +// the minimum value is 1, even for an empty table. +// Expected results for a 32-bit target: +// calcMaskWords(0..4) = 1 +// calcMaskWords(5..8) = 2 +// calcMaskWords(9..16) = 4 +// For a 64-bit target: +// calcMaskWords(0..8) = 1 +// calcMaskWords(9..16) = 2 +// calcMaskWords(17..32) = 4 +template <class ELFT> +unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { + if (!NumHashed) + return 1; + return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); +} + +template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { + unsigned NumHashed = Symbols.size(); + NBuckets = calcNBuckets(NumHashed); + MaskWords = calcMaskWords(NumHashed); + // Second hash shift estimation: just predefined values. + Shift2 = ELFT::Is64Bits ? 6 : 5; + + this->OutSec->Entsize = this->Entsize; + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->Size = sizeof(Elf_Word) * 4 // Header + + sizeof(Elf_Off) * MaskWords // Bloom Filter + + sizeof(Elf_Word) * NBuckets // Hash Buckets + + sizeof(Elf_Word) * NumHashed; // Hash Values +} + +template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { + writeHeader(Buf); + if (Symbols.empty()) + return; + writeBloomFilter(Buf); + writeHashTable(Buf); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NBuckets; + *P++ = In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(); + *P++ = MaskWords; + *P++ = Shift2; + Buf = reinterpret_cast<uint8_t *>(P); +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { + unsigned C = sizeof(Elf_Off) * 8; + + auto *Masks = reinterpret_cast<Elf_Off *>(Buf); + for (const SymbolData &Sym : Symbols) { + size_t Pos = (Sym.Hash / C) & (MaskWords - 1); + uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | + (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); + Masks[Pos] |= V; + } + Buf += sizeof(Elf_Off) * MaskWords; +} + +template <class ELFT> +void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { + Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); + Elf_Word *Values = Buckets + NBuckets; + + int PrevBucket = -1; + int I = 0; + for (const SymbolData &Sym : Symbols) { + int Bucket = Sym.Hash % NBuckets; + assert(PrevBucket <= Bucket); + if (Bucket != PrevBucket) { + Buckets[Bucket] = Sym.Body->DynsymIndex; + PrevBucket = Bucket; + if (I > 0) + Values[I - 1] |= 1; + } + Values[I] = Sym.Hash & ~1; + ++I; + } + if (I > 0) + Values[I - 1] |= 1; +} + +static uint32_t hashGnu(StringRef Name) { + uint32_t H = 5381; + for (uint8_t C : Name) + H = (H << 5) + H + C; + return H; +} + +// Add symbols to this symbol hash table. Note that this function +// destructively sort a given vector -- which is needed because +// GNU-style hash table places some sorting requirements. +template <class ELFT> +void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) { + // Ideally this will just be 'auto' but GCC 6.1 is not able + // to deduce it correctly. + std::vector<SymbolTableEntry>::iterator Mid = + std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { + return S.Symbol->isUndefined(); + }); + if (Mid == V.end()) + return; + for (auto I = Mid, E = V.end(); I != E; ++I) { + SymbolBody *B = I->Symbol; + size_t StrOff = I->StrTabOffset; + Symbols.push_back({B, StrOff, hashGnu(B->getName())}); + } + + unsigned NBuckets = calcNBuckets(Symbols.size()); + std::stable_sort(Symbols.begin(), Symbols.end(), + [&](const SymbolData &L, const SymbolData &R) { + return L.Hash % NBuckets < R.Hash % NBuckets; + }); + + V.erase(Mid, V.end()); + for (const SymbolData &Sym : Symbols) + V.push_back({Sym.Body, Sym.STName}); +} + +template <class ELFT> +HashTableSection<ELFT>::HashTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_HASH, sizeof(Elf_Word), ".hash") { + this->Entsize = sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->OutSec->Entsize = this->Entsize; + + unsigned NumEntries = 2; // nbucket and nchain. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. + + // Create as many buckets as there are symbols. + // FIXME: This is simplistic. We can try to optimize it, but implementing + // support for SHT_GNU_HASH is probably even more profitable. + NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); + this->Size = NumEntries * sizeof(Elf_Word); +} + +template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { + unsigned NumSymbols = In<ELFT>::DynSymTab->getNumSymbols(); + auto *P = reinterpret_cast<Elf_Word *>(Buf); + *P++ = NumSymbols; // nbucket + *P++ = NumSymbols; // nchain + + Elf_Word *Buckets = P; + Elf_Word *Chains = P + NumSymbols; + + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + SymbolBody *Body = S.Symbol; + StringRef Name = Body->getName(); + unsigned I = Body->DynsymIndex; + uint32_t Hash = hashSysV(Name) % NumSymbols; + Chains[I] = Buckets[Hash]; + Buckets[Hash] = I; + } +} + +template <class ELFT> +PltSection<ELFT>::PltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { + // At beginning of PLT, we have code to call the dynamic linker + // to resolve dynsyms at runtime. Write such code. + Target->writePltHeader(Buf); + size_t Off = Target->PltHeaderSize; + + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second; + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + unsigned RelOff = In<ELFT>::RelaPlt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t PltSection<ELFT>::getSize() const { + return Target->PltHeaderSize + Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +IpltSection<ELFT>::IpltSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + ".plt") {} + +template <class ELFT> void IpltSection<ELFT>::writeTo(uint8_t *Buf) { + // The IRelative relocations do not support lazy binding so no header is + // needed + size_t Off = 0; + for (auto &I : Entries) { + const SymbolBody *B = I.first; + unsigned RelOff = I.second + In<ELFT>::Plt->getSize(); + uint64_t Got = B->getGotPltVA<ELFT>(); + uint64_t Plt = this->getVA() + Off; + Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Off += Target->PltEntrySize; + } +} + +template <class ELFT> void IpltSection<ELFT>::addEntry(SymbolBody &Sym) { + Sym.PltIndex = Entries.size(); + Sym.IsInIplt = true; + unsigned RelOff = In<ELFT>::RelaIplt->getRelocOffset(); + Entries.push_back(std::make_pair(&Sym, RelOff)); +} + +template <class ELFT> size_t IpltSection<ELFT>::getSize() const { + return Entries.size() * Target->PltEntrySize; +} + +template <class ELFT> +GdbIndexSection<ELFT>::GdbIndexSection() + : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} + +template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { + for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) + if (InputSection<ELFT> *IS = dyn_cast<InputSection<ELFT>>(S)) + if (IS->OutSec && IS->Name == ".debug_info") + readDwarf(IS); +} + +// Iterative hash function for symbol's name is described in .gdb_index format +// specification. Note that we use one for version 5 to 7 here, it is different +// for version 4. +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; +} + +template <class ELFT> +void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { + GdbIndexBuilder<ELFT> Builder(I); + if (ErrorCount) + return; + + size_t CuId = CompilationUnits.size(); + std::vector<std::pair<uintX_t, uintX_t>> CuList = Builder.readCUList(); + CompilationUnits.insert(CompilationUnits.end(), CuList.begin(), CuList.end()); + + std::vector<AddressEntry<ELFT>> AddrArea = Builder.readAddressArea(CuId); + AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + + std::vector<std::pair<StringRef, uint8_t>> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair<StringRef, uint8_t> &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector<std::pair<uint32_t, uint8_t>> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } +} + +template <class ELFT> void GdbIndexSection<ELFT>::finalize() { + if (Finalized) + return; + Finalized = true; + + parseDebugSections(); + + // GdbIndex header consist from version fields + // and 5 more fields with different kinds of offsets. + CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; + SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); +} + +template <class ELFT> size_t GdbIndexSection<ELFT>::getSize() const { + const_cast<GdbIndexSection<ELFT> *>(this)->finalize(); + return StringPoolOffset + StringPool.getSize(); +} + +template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { + write32le(Buf, 7); // Write version. + write32le(Buf + 4, CuListOffset); // CU list offset. + write32le(Buf + 8, CuTypesOffset); // Types CU list offset. + write32le(Buf + 12, CuTypesOffset); // Address area offset. + write32le(Buf + 16, SymTabOffset); // Symbol table offset. + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. + Buf += 24; + + // Write the CU list. + for (std::pair<uintX_t, uintX_t> CU : CompilationUnits) { + write64le(Buf, CU.first); + write64le(Buf + 8, CU.second); + Buf += 16; + } + + // Write the address area. + for (AddressEntry<ELFT> &E : AddressArea) { + uintX_t BaseAddr = E.Section->OutSec->Addr + E.Section->getOffset(0); + write64le(Buf, BaseAddr + E.LowAddress); + write64le(Buf + 8, BaseAddr + E.HighAddress); + write32le(Buf + 16, E.CuIndex); + Buf += 20; + } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair<uint32_t, uint8_t> &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); +} + +template <class ELFT> bool GdbIndexSection<ELFT>::empty() const { + return !Out<ELFT>::DebugInfo; +} + +template <class ELFT> +EhFrameHeader<ELFT>::EhFrameHeader() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} + +// .eh_frame_hdr contains a binary search table of pointers to FDEs. +// Each entry of the search table consists of two values, +// the starting PC from where FDEs covers, and the FDE's address. +// It is sorted by PC. +template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; + std::stable_sort(Fdes.begin(), Fdes.end(), Less); + auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; + Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); + + Buf[0] = 1; + Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; + Buf[2] = DW_EH_PE_udata4; + Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; + write32<E>(Buf + 4, Out<ELFT>::EhFrame->Addr - this->getVA() - 4); + write32<E>(Buf + 8, Fdes.size()); + Buf += 12; + + uintX_t VA = this->getVA(); + for (FdeData &Fde : Fdes) { + write32<E>(Buf, Fde.Pc - VA); + write32<E>(Buf + 4, Fde.FdeVA - VA); + Buf += 8; + } +} + +template <class ELFT> size_t EhFrameHeader<ELFT>::getSize() const { + // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. + return 12 + Out<ELFT>::EhFrame->NumFdes * 8; +} + +template <class ELFT> +void EhFrameHeader<ELFT>::addFde(uint32_t Pc, uint32_t FdeVA) { + Fdes.push_back({Pc, FdeVA}); +} + +template <class ELFT> bool EhFrameHeader<ELFT>::empty() const { + return Out<ELFT>::EhFrame->empty(); +} + +template <class ELFT> +VersionDefinitionSection<ELFT>::VersionDefinitionSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), + ".gnu.version_d") {} + +static StringRef getFileDefName() { + if (!Config->SoName.empty()) + return Config->SoName; + return Config->OutputFile; +} + +template <class ELFT> void VersionDefinitionSection<ELFT>::finalize() { + FileDefNameOff = In<ELFT>::DynStrTab->addString(getFileDefName()); + for (VersionDefinition &V : Config->VersionDefinitions) + V.NameOff = In<ELFT>::DynStrTab->addString(V.Name); + + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + + // sh_info should be set to the number of definitions. This fact is missed in + // documentation, but confirmed by binutils community: + // https://sourceware.org/ml/binutils/2014-11/msg00355.html + this->OutSec->Info = this->Info = getVerDefNum(); +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeOne(uint8_t *Buf, uint32_t Index, + StringRef Name, size_t NameOff) { + auto *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_version = 1; + Verdef->vd_cnt = 1; + Verdef->vd_aux = sizeof(Elf_Verdef); + Verdef->vd_next = sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + Verdef->vd_flags = (Index == 1 ? VER_FLG_BASE : 0); + Verdef->vd_ndx = Index; + Verdef->vd_hash = hashSysV(Name); + + auto *Verdaux = reinterpret_cast<Elf_Verdaux *>(Buf + sizeof(Elf_Verdef)); + Verdaux->vda_name = NameOff; + Verdaux->vda_next = 0; +} + +template <class ELFT> +void VersionDefinitionSection<ELFT>::writeTo(uint8_t *Buf) { + writeOne(Buf, 1, getFileDefName(), FileDefNameOff); + + for (VersionDefinition &V : Config->VersionDefinitions) { + Buf += sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); + writeOne(Buf, V.Id, V.Name, V.NameOff); + } + + // Need to terminate the last version definition. + Elf_Verdef *Verdef = reinterpret_cast<Elf_Verdef *>(Buf); + Verdef->vd_next = 0; +} + +template <class ELFT> size_t VersionDefinitionSection<ELFT>::getSize() const { + return (sizeof(Elf_Verdef) + sizeof(Elf_Verdaux)) * getVerDefNum(); +} + +template <class ELFT> +VersionTableSection<ELFT>::VersionTableSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), + ".gnu.version") {} + +template <class ELFT> void VersionTableSection<ELFT>::finalize() { + this->OutSec->Entsize = this->Entsize = sizeof(Elf_Versym); + // At the moment of june 2016 GNU docs does not mention that sh_link field + // should be set, but Sun docs do. Also readelf relies on this field. + this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; +} + +template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const { + return sizeof(Elf_Versym) * (In<ELFT>::DynSymTab->getSymbols().size() + 1); +} + +template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) { + auto *OutVersym = reinterpret_cast<Elf_Versym *>(Buf) + 1; + for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) { + OutVersym->vs_index = S.Symbol->symbol()->VersionId; + ++OutVersym; + } +} + +template <class ELFT> bool VersionTableSection<ELFT>::empty() const { + return !In<ELFT>::VerDef && In<ELFT>::VerNeed->empty(); +} + +template <class ELFT> +VersionNeedSection<ELFT>::VersionNeedSection() + : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), + ".gnu.version_r") { + // Identifiers in verneed section start at 2 because 0 and 1 are reserved + // for VER_NDX_LOCAL and VER_NDX_GLOBAL. + // First identifiers are reserved by verdef section if it exist. + NextIndex = getVerDefNum() + 1; +} + +template <class ELFT> +void VersionNeedSection<ELFT>::addSymbol(SharedSymbol<ELFT> *SS) { + if (!SS->Verdef) { + SS->symbol()->VersionId = VER_NDX_GLOBAL; + return; + } + SharedFile<ELFT> *F = SS->file(); + // If we don't already know that we need an Elf_Verneed for this DSO, prepare + // to create one by adding it to our needed list and creating a dynstr entry + // for the soname. + if (F->VerdefMap.empty()) + Needed.push_back({F, In<ELFT>::DynStrTab->addString(F->getSoName())}); + typename SharedFile<ELFT>::NeededVer &NV = F->VerdefMap[SS->Verdef]; + // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, + // prepare to create one by allocating a version identifier and creating a + // dynstr entry for the version name. + if (NV.Index == 0) { + NV.StrTab = In<ELFT>::DynStrTab->addString( + SS->file()->getStringTable().data() + SS->Verdef->getAux()->vda_name); + NV.Index = NextIndex++; + } + SS->symbol()->VersionId = NV.Index; +} + +template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) { + // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. + auto *Verneed = reinterpret_cast<Elf_Verneed *>(Buf); + auto *Vernaux = reinterpret_cast<Elf_Vernaux *>(Verneed + Needed.size()); + + for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) { + // Create an Elf_Verneed for this DSO. + Verneed->vn_version = 1; + Verneed->vn_cnt = P.first->VerdefMap.size(); + Verneed->vn_file = P.second; + Verneed->vn_aux = + reinterpret_cast<char *>(Vernaux) - reinterpret_cast<char *>(Verneed); + Verneed->vn_next = sizeof(Elf_Verneed); + ++Verneed; + + // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over + // VerdefMap, which will only contain references to needed version + // definitions. Each Elf_Vernaux is based on the information contained in + // the Elf_Verdef in the source DSO. This loop iterates over a std::map of + // pointers, but is deterministic because the pointers refer to Elf_Verdef + // data structures within a single input file. + for (auto &NV : P.first->VerdefMap) { + Vernaux->vna_hash = NV.first->vd_hash; + Vernaux->vna_flags = 0; + Vernaux->vna_other = NV.second.Index; + Vernaux->vna_name = NV.second.StrTab; + Vernaux->vna_next = sizeof(Elf_Vernaux); + ++Vernaux; + } + + Vernaux[-1].vna_next = 0; + } + Verneed[-1].vn_next = 0; +} + +template <class ELFT> void VersionNeedSection<ELFT>::finalize() { + this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + this->OutSec->Info = this->Info = Needed.size(); +} + +template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const { + unsigned Size = Needed.size() * sizeof(Elf_Verneed); + for (const std::pair<SharedFile<ELFT> *, size_t> &P : Needed) + Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux); + return Size; +} + +template <class ELFT> bool VersionNeedSection<ELFT>::empty() const { + return getNeedNum() == 0; +} + +template <class ELFT> +MipsRldMapSection<ELFT>::MipsRldMapSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + sizeof(typename ELFT::uint), ".rld_map") {} + +template <class ELFT> void MipsRldMapSection<ELFT>::writeTo(uint8_t *Buf) { + // Apply filler from linker script. + uint64_t Filler = Script<ELFT>::X->getFiller(this->Name); + Filler = (Filler << 32) | Filler; + memcpy(Buf, &Filler, getSize()); +} + +template <class ELFT> +ARMExidxSentinelSection<ELFT>::ARMExidxSentinelSection() + : SyntheticSection<ELFT>(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, + sizeof(typename ELFT::uint), ".ARM.exidx") {} + +// Write a terminating sentinel entry to the end of the .ARM.exidx table. +// This section will have been sorted last in the .ARM.exidx table. +// This table entry will have the form: +// | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | +template <class ELFT> +void ARMExidxSentinelSection<ELFT>::writeTo(uint8_t *Buf) { + // Get the InputSection before us, we are by definition last + auto RI = cast<OutputSection<ELFT>>(this->OutSec)->Sections.rbegin(); + InputSection<ELFT> *LE = *(++RI); + InputSection<ELFT> *LC = cast<InputSection<ELFT>>(LE->getLinkOrderDep()); + uint64_t S = LC->OutSec->Addr + LC->getOffset(LC->getSize()); + uint64_t P = this->getVA(); + Target->relocateOne(Buf, R_ARM_PREL31, S - P); + write32le(Buf + 4, 0x1); +} + +template InputSection<ELF32LE> *elf::createCommonSection(); +template InputSection<ELF32BE> *elf::createCommonSection(); +template InputSection<ELF64LE> *elf::createCommonSection(); +template InputSection<ELF64BE> *elf::createCommonSection(); + +template InputSection<ELF32LE> *elf::createInterpSection(); +template InputSection<ELF32BE> *elf::createInterpSection(); +template InputSection<ELF64LE> *elf::createInterpSection(); +template InputSection<ELF64BE> *elf::createInterpSection(); + +template MergeInputSection<ELF32LE> *elf::createCommentSection(); +template MergeInputSection<ELF32BE> *elf::createCommentSection(); +template MergeInputSection<ELF64LE> *elf::createCommentSection(); +template MergeInputSection<ELF64BE> *elf::createCommentSection(); + +template class elf::MipsAbiFlagsSection<ELF32LE>; +template class elf::MipsAbiFlagsSection<ELF32BE>; +template class elf::MipsAbiFlagsSection<ELF64LE>; +template class elf::MipsAbiFlagsSection<ELF64BE>; + +template class elf::MipsOptionsSection<ELF32LE>; +template class elf::MipsOptionsSection<ELF32BE>; +template class elf::MipsOptionsSection<ELF64LE>; +template class elf::MipsOptionsSection<ELF64BE>; + +template class elf::MipsReginfoSection<ELF32LE>; +template class elf::MipsReginfoSection<ELF32BE>; +template class elf::MipsReginfoSection<ELF64LE>; +template class elf::MipsReginfoSection<ELF64BE>; + +template class elf::BuildIdSection<ELF32LE>; +template class elf::BuildIdSection<ELF32BE>; +template class elf::BuildIdSection<ELF64LE>; +template class elf::BuildIdSection<ELF64BE>; + +template class elf::GotSection<ELF32LE>; +template class elf::GotSection<ELF32BE>; +template class elf::GotSection<ELF64LE>; +template class elf::GotSection<ELF64BE>; + +template class elf::MipsGotSection<ELF32LE>; +template class elf::MipsGotSection<ELF32BE>; +template class elf::MipsGotSection<ELF64LE>; +template class elf::MipsGotSection<ELF64BE>; + +template class elf::GotPltSection<ELF32LE>; +template class elf::GotPltSection<ELF32BE>; +template class elf::GotPltSection<ELF64LE>; +template class elf::GotPltSection<ELF64BE>; + +template class elf::IgotPltSection<ELF32LE>; +template class elf::IgotPltSection<ELF32BE>; +template class elf::IgotPltSection<ELF64LE>; +template class elf::IgotPltSection<ELF64BE>; + +template class elf::StringTableSection<ELF32LE>; +template class elf::StringTableSection<ELF32BE>; +template class elf::StringTableSection<ELF64LE>; +template class elf::StringTableSection<ELF64BE>; + +template class elf::DynamicSection<ELF32LE>; +template class elf::DynamicSection<ELF32BE>; +template class elf::DynamicSection<ELF64LE>; +template class elf::DynamicSection<ELF64BE>; + +template class elf::RelocationSection<ELF32LE>; +template class elf::RelocationSection<ELF32BE>; +template class elf::RelocationSection<ELF64LE>; +template class elf::RelocationSection<ELF64BE>; + +template class elf::SymbolTableSection<ELF32LE>; +template class elf::SymbolTableSection<ELF32BE>; +template class elf::SymbolTableSection<ELF64LE>; +template class elf::SymbolTableSection<ELF64BE>; + +template class elf::GnuHashTableSection<ELF32LE>; +template class elf::GnuHashTableSection<ELF32BE>; +template class elf::GnuHashTableSection<ELF64LE>; +template class elf::GnuHashTableSection<ELF64BE>; + +template class elf::HashTableSection<ELF32LE>; +template class elf::HashTableSection<ELF32BE>; +template class elf::HashTableSection<ELF64LE>; +template class elf::HashTableSection<ELF64BE>; + +template class elf::PltSection<ELF32LE>; +template class elf::PltSection<ELF32BE>; +template class elf::PltSection<ELF64LE>; +template class elf::PltSection<ELF64BE>; + +template class elf::IpltSection<ELF32LE>; +template class elf::IpltSection<ELF32BE>; +template class elf::IpltSection<ELF64LE>; +template class elf::IpltSection<ELF64BE>; + +template class elf::GdbIndexSection<ELF32LE>; +template class elf::GdbIndexSection<ELF32BE>; +template class elf::GdbIndexSection<ELF64LE>; +template class elf::GdbIndexSection<ELF64BE>; + +template class elf::EhFrameHeader<ELF32LE>; +template class elf::EhFrameHeader<ELF32BE>; +template class elf::EhFrameHeader<ELF64LE>; +template class elf::EhFrameHeader<ELF64BE>; + +template class elf::VersionTableSection<ELF32LE>; +template class elf::VersionTableSection<ELF32BE>; +template class elf::VersionTableSection<ELF64LE>; +template class elf::VersionTableSection<ELF64BE>; + +template class elf::VersionNeedSection<ELF32LE>; +template class elf::VersionNeedSection<ELF32BE>; +template class elf::VersionNeedSection<ELF64LE>; +template class elf::VersionNeedSection<ELF64BE>; + +template class elf::VersionDefinitionSection<ELF32LE>; +template class elf::VersionDefinitionSection<ELF32BE>; +template class elf::VersionDefinitionSection<ELF64LE>; +template class elf::VersionDefinitionSection<ELF64BE>; + +template class elf::MipsRldMapSection<ELF32LE>; +template class elf::MipsRldMapSection<ELF32BE>; +template class elf::MipsRldMapSection<ELF64LE>; +template class elf::MipsRldMapSection<ELF64BE>; + +template class elf::ARMExidxSentinelSection<ELF32LE>; +template class elf::ARMExidxSentinelSection<ELF32BE>; +template class elf::ARMExidxSentinelSection<ELF64LE>; +template class elf::ARMExidxSentinelSection<ELF64BE>; diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h new file mode 100644 index 000000000000..dfefb3821e75 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h @@ -0,0 +1,747 @@ +//===- SyntheticSection.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYNTHETIC_SECTION_H +#define LLD_ELF_SYNTHETIC_SECTION_H + +#include "GdbIndex.h" +#include "InputSection.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" + +namespace lld { +namespace elf { + +template <class ELFT> class SyntheticSection : public InputSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + SyntheticSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + StringRef Name) + : InputSection<ELFT>(Flags, Type, Addralign, ArrayRef<uint8_t>(), Name, + InputSectionData::Synthetic) { + this->Live = true; + } + + virtual ~SyntheticSection() = default; + virtual void writeTo(uint8_t *Buf) = 0; + virtual size_t getSize() const = 0; + virtual void finalize() {} + virtual bool empty() const { return false; } + + uintX_t getVA() const { + return this->OutSec ? this->OutSec->Addr + this->OutSecOff : 0; + } + + static bool classof(const InputSectionData *D) { + return D->kind() == InputSectionData::Synthetic; + } +}; + +template <class ELFT> class GotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + + void addEntry(SymbolBody &Sym); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getGlobalDynAddr(const SymbolBody &B) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + uintX_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; } + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + // Flag to force GOT to be in output if we have relocations + // that relies on its address. + bool HasGotOffRel = false; + +private: + size_t NumEntries = 0; + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +// .note.gnu.build-id section. +template <class ELFT> class BuildIdSection : public SyntheticSection<ELFT> { + // First 16 bytes are a header. + static const unsigned HeaderSize = 16; + +public: + BuildIdSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return HeaderSize + HashSize; } + void writeBuildId(llvm::ArrayRef<uint8_t> Buf); + +private: + void computeHash(llvm::ArrayRef<uint8_t> Buf, + std::function<void(uint8_t *, ArrayRef<uint8_t>)> Hash); + + size_t HashSize; + uint8_t *HashBuf; +}; + +template <class ELFT> +class MipsGotSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + MipsGotSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + void finalize() override; + bool empty() const override; + void addEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr); + bool addDynTlsEntry(SymbolBody &Sym); + bool addTlsIndex(); + uintX_t getPageEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getBodyEntryOffset(const SymbolBody &B, uintX_t Addend) const; + uintX_t getGlobalDynOffset(const SymbolBody &B) const; + + // Returns the symbol which corresponds to the first entry of the global part + // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic + // table properties. + // Returns nullptr if the global part is empty. + const SymbolBody *getFirstGlobalEntry() const; + + // Returns the number of entries in the local part of GOT including + // the number of reserved entries. + unsigned getLocalEntriesNum() const; + + // Returns offset of TLS part of the MIPS GOT table. This part goes + // after 'local' and 'global' entries. + uintX_t getTlsOffset() const; + + uint32_t getTlsIndexOff() const { return TlsIndexOff; } + + uintX_t getGp() const; + +private: + // MIPS GOT consists of three parts: local, global and tls. Each part + // contains different types of entries. Here is a layout of GOT: + // - Header entries | + // - Page entries | Local part + // - Local entries (16-bit access) | + // - Local entries (32-bit access) | + // - Normal global entries || Global part + // - Reloc-only global entries || + // - TLS entries ||| TLS part + // + // Header: + // Two entries hold predefined value 0x0 and 0x80000000. + // Page entries: + // These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16 + // relocation against local symbols. They are initialized by higher 16-bit + // of the corresponding symbol's value. So each 64kb of address space + // requires a single GOT entry. + // Local entries (16-bit access): + // These entries created by GOT relocations against global non-preemptible + // symbols so dynamic linker is not necessary to resolve the symbol's + // values. "16-bit access" means that corresponding relocations address + // GOT using 16-bit index. Each unique Symbol-Addend pair has its own + // GOT entry. + // Local entries (32-bit access): + // These entries are the same as above but created by relocations which + // address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc). + // Normal global entries: + // These entries created by GOT relocations against preemptible global + // symbols. They need to be initialized by dynamic linker and they ordered + // exactly as the corresponding entries in the dynamic symbols table. + // Reloc-only global entries: + // These entries created for symbols that are referenced by dynamic + // relocations R_MIPS_REL32. These entries are not accessed with gp-relative + // addressing, but MIPS ABI requires that these entries be present in GOT. + // TLS entries: + // Entries created by TLS relocations. + + // Number of "Header" entries. + static const unsigned HeaderEntriesNum = 2; + // Number of allocated "Page" entries. + uint32_t PageEntriesNum = 0; + // Map output sections referenced by MIPS GOT relocations + // to the first index of "Page" entries allocated for this section. + llvm::SmallMapVector<const OutputSectionBase *, size_t, 16> PageIndexMap; + + typedef std::pair<const SymbolBody *, uintX_t> GotEntry; + typedef std::vector<GotEntry> GotEntries; + // Map from Symbol-Addend pair to the GOT index. + llvm::DenseMap<GotEntry, size_t> EntryIndexMap; + // Local entries (16-bit access). + GotEntries LocalEntries; + // Local entries (32-bit access). + GotEntries LocalEntries32; + + // Normal and reloc-only global entries. + GotEntries GlobalEntries; + + // TLS entries. + std::vector<const SymbolBody *> TlsEntries; + + uint32_t TlsIndexOff = -1; + uintX_t Size = 0; +}; + +template <class ELFT> +class GotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + GotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +// The IgotPltSection is a Got associated with the IpltSection for GNU Ifunc +// Symbols that will be relocated by Target->IRelativeRel. +// On most Targets the IgotPltSection will immediately follow the GotPltSection +// on ARM the IgotPltSection will immediately follow the GotSection. +template <class ELFT> +class IgotPltSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + IgotPltSection(); + void addEntry(SymbolBody &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Entries.empty(); } + +private: + std::vector<const SymbolBody *> Entries; +}; + +template <class ELFT> +class StringTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::uint uintX_t; + StringTableSection(StringRef Name, bool Dynamic); + unsigned addString(StringRef S, bool HashIt = true); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + bool isDynamic() const { return Dynamic; } + +private: + const bool Dynamic; + + // ELF string tables start with a NUL byte, so 1. + uintX_t Size = 1; + + llvm::DenseMap<StringRef, unsigned> StringMap; + std::vector<StringRef> Strings; +}; + +template <class ELFT> class DynamicReloc { + typedef typename ELFT::uint uintX_t; + +public: + DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + DynamicReloc(uint32_t Type, const OutputSectionBase *OutputSec, + uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + uintX_t Addend) + : Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec), + UseSymVA(UseSymVA), Addend(Addend) {} + + uintX_t getOffset() const; + uintX_t getAddend() const; + uint32_t getSymIndex() const; + const OutputSectionBase *getOutputSec() const { return OutputSec; } + const InputSectionBase<ELFT> *getInputSec() const { return InputSec; } + + uint32_t Type; + +private: + SymbolBody *Sym; + const InputSectionBase<ELFT> *InputSec = nullptr; + const OutputSectionBase *OutputSec = nullptr; + uintX_t OffsetInSec; + bool UseSymVA; + uintX_t Addend; +}; + +template <class ELFT> +class DynamicSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Dyn Elf_Dyn; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::uint uintX_t; + + // The .dynamic section contains information for the dynamic linker. + // The section consists of fixed size entries, which consist of + // type and value fields. Value are one of plain integers, symbol + // addresses, or section addresses. This struct represents the entry. + struct Entry { + int32_t Tag; + union { + OutputSectionBase *OutSec; + InputSection<ELFT> *InSec; + uint64_t Val; + const SymbolBody *Sym; + }; + enum KindT { SecAddr, SecSize, SymAddr, PlainInt, InSecAddr } Kind; + Entry(int32_t Tag, OutputSectionBase *OutSec, KindT Kind = SecAddr) + : Tag(Tag), OutSec(OutSec), Kind(Kind) {} + Entry(int32_t Tag, InputSection<ELFT> *Sec) + : Tag(Tag), InSec(Sec), Kind(InSecAddr) {} + Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {} + Entry(int32_t Tag, const SymbolBody *Sym) + : Tag(Tag), Sym(Sym), Kind(SymAddr) {} + }; + + // finalize() fills this vector with the section contents. finalize() + // cannot directly create final section contents because when the + // function is called, symbol or section addresses are not fixed yet. + std::vector<Entry> Entries; + +public: + DynamicSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return Size; } + +private: + void addEntries(); + void add(Entry E) { Entries.push_back(E); } + uintX_t Size = 0; +}; + +template <class ELFT> +class RelocationSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::uint uintX_t; + +public: + RelocationSection(StringRef Name, bool Sort); + void addReloc(const DynamicReloc<ELFT> &Reloc); + unsigned getRelocOffset(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + bool empty() const override { return Relocs.empty(); } + size_t getSize() const override { return Relocs.size() * this->Entsize; } + size_t getRelativeRelocCount() const { return NumRelativeRelocs; } + +private: + bool Sort; + size_t NumRelativeRelocs = 0; + std::vector<DynamicReloc<ELFT>> Relocs; +}; + +struct SymbolTableEntry { + SymbolBody *Symbol; + size_t StrTabOffset; +}; + +template <class ELFT> +class SymbolTableSection final : public SyntheticSection<ELFT> { +public: + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::SymRange Elf_Sym_Range; + typedef typename ELFT::uint uintX_t; + SymbolTableSection(StringTableSection<ELFT> &StrTabSec); + + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); } + void addSymbol(SymbolBody *Body); + StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } + unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; } + + ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; } + + unsigned NumLocals = 0; + StringTableSection<ELFT> &StrTabSec; + +private: + void writeLocalSymbols(uint8_t *&Buf); + void writeGlobalSymbols(uint8_t *Buf); + + const OutputSectionBase *getOutputSection(SymbolBody *Sym); + + // A vector of symbols and their string table offsets. + std::vector<SymbolTableEntry> Symbols; +}; + +// Outputs GNU Hash section. For detailed explanation see: +// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections +template <class ELFT> +class GnuHashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Off Elf_Off; + typedef typename ELFT::Word Elf_Word; + typedef typename ELFT::uint uintX_t; + +public: + GnuHashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + + // Adds symbols to the hash table. + // Sorts the input to satisfy GNU hash section requirements. + void addSymbols(std::vector<SymbolTableEntry> &Symbols); + +private: + static unsigned calcNBuckets(unsigned NumHashed); + static unsigned calcMaskWords(unsigned NumHashed); + + void writeHeader(uint8_t *&Buf); + void writeBloomFilter(uint8_t *&Buf); + void writeHashTable(uint8_t *Buf); + + struct SymbolData { + SymbolBody *Body; + size_t STName; + uint32_t Hash; + }; + + std::vector<SymbolData> Symbols; + + unsigned MaskWords; + unsigned NBuckets; + unsigned Shift2; + uintX_t Size = 0; +}; + +template <class ELFT> +class HashTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Word Elf_Word; + +public: + HashTableSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override { return this->Size; } + +private: + size_t Size = 0; +}; + +template <class ELFT> class PltSection final : public SyntheticSection<ELFT> { +public: + PltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +// The IpltSection is a variant of Plt for recording entries for GNU Ifunc +// symbols that will be subject to a Target->IRelativeRel +// The IpltSection immediately follows the Plt section in the Output Section +template <class ELFT> class IpltSection final : public SyntheticSection<ELFT> { +public: + IpltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addEntry(SymbolBody &Sym); + bool empty() const override { return Entries.empty(); } + +private: + std::vector<std::pair<const SymbolBody *, unsigned>> Entries; +}; + +template <class ELFT> +class GdbIndexSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + + const unsigned OffsetTypeSize = 4; + const unsigned CuListOffset = 6 * OffsetTypeSize; + const unsigned CompilationUnitSize = 16; + const unsigned AddressEntrySize = 16 + OffsetTypeSize; + const unsigned SymTabEntrySize = 2 * OffsetTypeSize; + +public: + GdbIndexSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + bool empty() const override; + + // Pairs of [CU Offset, CU length]. + std::vector<std::pair<uintX_t, uintX_t>> CompilationUnits; + + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector<std::vector<std::pair<uint32_t, uint8_t>>> CuVectors; + + std::vector<AddressEntry<ELFT>> AddressArea; + +private: + void parseDebugSections(); + void readDwarf(InputSection<ELFT> *I); + + uint32_t CuTypesOffset; + uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector<size_t> CuVectorsOffset; + + bool Finalized = false; +}; + +// --eh-frame-hdr option tells linker to construct a header for all the +// .eh_frame sections. This header is placed to a section named .eh_frame_hdr +// and also to a PT_GNU_EH_FRAME segment. +// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by +// calling dl_iterate_phdr. +// This section contains a lookup table for quick binary search of FDEs. +// Detailed info about internals can be found in Ian Lance Taylor's blog: +// http://www.airs.com/blog/archives/460 (".eh_frame") +// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") +template <class ELFT> +class EhFrameHeader final : public SyntheticSection<ELFT> { + typedef typename ELFT::uint uintX_t; + +public: + EhFrameHeader(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + void addFde(uint32_t Pc, uint32_t FdeVA); + bool empty() const override; + +private: + struct FdeData { + uint32_t Pc; + uint32_t FdeVA; + }; + + std::vector<FdeData> Fdes; +}; + +// For more information about .gnu.version and .gnu.version_r see: +// https://www.akkadia.org/drepper/symbol-versioning + +// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall +// contain symbol version definitions. The number of entries in this section +// shall be contained in the DT_VERDEFNUM entry of the .dynamic section. +// The section shall contain an array of Elf_Verdef structures, optionally +// followed by an array of Elf_Verdaux structures. +template <class ELFT> +class VersionDefinitionSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Verdaux Elf_Verdaux; + +public: + VersionDefinitionSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + +private: + void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff); + + unsigned FileDefNameOff; +}; + +// The .gnu.version section specifies the required version of each symbol in the +// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol +// table entry. An Elf_Versym is just a 16-bit integer that refers to a version +// identifier defined in the either .gnu.version_r or .gnu.version_d section. +// The values 0 and 1 are reserved. All other values are used for versions in +// the own object or in any of the dependencies. +template <class ELFT> +class VersionTableSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Versym Elf_Versym; + +public: + VersionTableSection(); + void finalize() override; + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override; +}; + +// The .gnu.version_r section defines the version identifiers used by +// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each +// Elf_Verneed specifies the version requirements for a single DSO, and contains +// a reference to a linked list of Elf_Vernaux data structures which define the +// mapping from version identifiers to version names. +template <class ELFT> +class VersionNeedSection final : public SyntheticSection<ELFT> { + typedef typename ELFT::Verneed Elf_Verneed; + typedef typename ELFT::Vernaux Elf_Vernaux; + + // A vector of shared files that need Elf_Verneed data structures and the + // string table offsets of their sonames. + std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed; + + // The next available version identifier. + unsigned NextIndex; + +public: + VersionNeedSection(); + void addSymbol(SharedSymbol<ELFT> *SS); + void finalize() override; + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; + size_t getNeedNum() const { return Needed.size(); } + bool empty() const override; +}; + +// .MIPS.abiflags section. +template <class ELFT> +class MipsAbiFlagsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_ABIFlags<ELFT> Elf_Mips_ABIFlags; + +public: + static MipsAbiFlagsSection *create(); + + MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags); + size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_ABIFlags Flags; +}; + +// .MIPS.options section. +template <class ELFT> +class MipsOptionsSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsOptionsSection *create(); + + MipsOptionsSection(Elf_Mips_RegInfo Reginfo); + void writeTo(uint8_t *Buf) override; + + size_t getSize() const override { + return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); + } + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// MIPS .reginfo section. +template <class ELFT> +class MipsReginfoSection final : public SyntheticSection<ELFT> { + typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; + +public: + static MipsReginfoSection *create(); + + MipsReginfoSection(Elf_Mips_RegInfo Reginfo); + size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); } + void writeTo(uint8_t *Buf) override; + +private: + Elf_Mips_RegInfo Reginfo; +}; + +// This is a MIPS specific section to hold a space within the data segment +// of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. +// See "Dynamic section" in Chapter 5 in the following document: +// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +template <class ELFT> class MipsRldMapSection : public SyntheticSection<ELFT> { +public: + MipsRldMapSection(); + size_t getSize() const override { return sizeof(typename ELFT::uint); } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> class ARMExidxSentinelSection : public SyntheticSection<ELFT> { +public: + ARMExidxSentinelSection(); + size_t getSize() const override { return 8; } + void writeTo(uint8_t *Buf) override; +}; + +template <class ELFT> InputSection<ELFT> *createCommonSection(); +template <class ELFT> InputSection<ELFT> *createInterpSection(); +template <class ELFT> MergeInputSection<ELFT> *createCommentSection(); + +// Linker generated sections which can be used as inputs. +template <class ELFT> struct In { + static InputSection<ELFT> *ARMAttributes; + static BuildIdSection<ELFT> *BuildId; + static InputSection<ELFT> *Common; + static DynamicSection<ELFT> *Dynamic; + static StringTableSection<ELFT> *DynStrTab; + static SymbolTableSection<ELFT> *DynSymTab; + static EhFrameHeader<ELFT> *EhFrameHdr; + static GnuHashTableSection<ELFT> *GnuHashTab; + static GdbIndexSection<ELFT> *GdbIndex; + static GotSection<ELFT> *Got; + static MipsGotSection<ELFT> *MipsGot; + static GotPltSection<ELFT> *GotPlt; + static IgotPltSection<ELFT> *IgotPlt; + static HashTableSection<ELFT> *HashTab; + static InputSection<ELFT> *Interp; + static MipsRldMapSection<ELFT> *MipsRldMap; + static PltSection<ELFT> *Plt; + static IpltSection<ELFT> *Iplt; + static RelocationSection<ELFT> *RelaDyn; + static RelocationSection<ELFT> *RelaPlt; + static RelocationSection<ELFT> *RelaIplt; + static StringTableSection<ELFT> *ShStrTab; + static StringTableSection<ELFT> *StrTab; + static SymbolTableSection<ELFT> *SymTab; + static VersionDefinitionSection<ELFT> *VerDef; + static VersionTableSection<ELFT> *VerSym; + static VersionNeedSection<ELFT> *VerNeed; +}; + +template <class ELFT> InputSection<ELFT> *In<ELFT>::ARMAttributes; +template <class ELFT> BuildIdSection<ELFT> *In<ELFT>::BuildId; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Common; +template <class ELFT> DynamicSection<ELFT> *In<ELFT>::Dynamic; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::DynStrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::DynSymTab; +template <class ELFT> EhFrameHeader<ELFT> *In<ELFT>::EhFrameHdr; +template <class ELFT> GdbIndexSection<ELFT> *In<ELFT>::GdbIndex; +template <class ELFT> GnuHashTableSection<ELFT> *In<ELFT>::GnuHashTab; +template <class ELFT> GotSection<ELFT> *In<ELFT>::Got; +template <class ELFT> MipsGotSection<ELFT> *In<ELFT>::MipsGot; +template <class ELFT> GotPltSection<ELFT> *In<ELFT>::GotPlt; +template <class ELFT> IgotPltSection<ELFT> *In<ELFT>::IgotPlt; +template <class ELFT> HashTableSection<ELFT> *In<ELFT>::HashTab; +template <class ELFT> InputSection<ELFT> *In<ELFT>::Interp; +template <class ELFT> MipsRldMapSection<ELFT> *In<ELFT>::MipsRldMap; +template <class ELFT> PltSection<ELFT> *In<ELFT>::Plt; +template <class ELFT> IpltSection<ELFT> *In<ELFT>::Iplt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaDyn; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaPlt; +template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaIplt; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::ShStrTab; +template <class ELFT> StringTableSection<ELFT> *In<ELFT>::StrTab; +template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::SymTab; +template <class ELFT> VersionDefinitionSection<ELFT> *In<ELFT>::VerDef; +template <class ELFT> VersionTableSection<ELFT> *In<ELFT>::VerSym; +template <class ELFT> VersionNeedSection<ELFT> *In<ELFT>::VerNeed; +} // namespace elf +} // namespace lld + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp index 0375eb96dc3e..edae7c65c1b4 100644 --- a/contrib/llvm/tools/lld/ELF/Target.cpp +++ b/contrib/llvm/tools/lld/ELF/Target.cpp @@ -10,7 +10,7 @@ // Machine-specific things, such as applying relocations, creation of // GOT or PLT entries, etc., are handled in this file. // -// Refer the ELF spec for the single letter varaibles, S, A or P, used +// Refer the ELF spec for the single letter variables, S, A or P, used // in this file. // // Some functions defined in this file has "relaxTls" as part of their names. @@ -27,14 +27,17 @@ #include "Target.h" #include "Error.h" #include "InputFiles.h" +#include "Memory.h" #include "OutputSections.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Thunks.h" - +#include "Writer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::object; @@ -47,34 +50,66 @@ namespace elf { TargetInfo *Target; static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } +static void or32be(uint8_t *P, int32_t V) { write32be(P, read32be(P) | V); } -StringRef getRelName(uint32_t Type) { +std::string toString(uint32_t Type) { return getELFRelocationTypeName(Config->EMachine, Type); } -template <unsigned N> static void checkInt(int64_t V, uint32_t Type) { +template <class ELFT> static std::string getErrorLoc(uint8_t *Loc) { + for (InputSectionData *D : Symtab<ELFT>::X->Sections) { + auto *IS = dyn_cast_or_null<InputSection<ELFT>>(D); + if (!IS || !IS->OutSec) + continue; + + uint8_t *ISLoc = cast<OutputSection<ELFT>>(IS->OutSec)->Loc + IS->OutSecOff; + if (ISLoc <= Loc && Loc < ISLoc + IS->getSize()) + return IS->getLocation(Loc - ISLoc) + ": "; + } + return ""; +} + +static std::string getErrorLocation(uint8_t *Loc) { + switch (Config->EKind) { + case ELF32LEKind: + return getErrorLoc<ELF32LE>(Loc); + case ELF32BEKind: + return getErrorLoc<ELF32BE>(Loc); + case ELF64LEKind: + return getErrorLoc<ELF64LE>(Loc); + case ELF64BEKind: + return getErrorLoc<ELF64BE>(Loc); + default: + llvm_unreachable("unknown ELF type"); + } +} + +template <unsigned N> +static void checkInt(uint8_t *Loc, int64_t V, uint32_t Type) { if (!isInt<N>(V)) - error("relocation " + getRelName(Type) + " out of range"); + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); } -template <unsigned N> static void checkUInt(uint64_t V, uint32_t Type) { +template <unsigned N> +static void checkUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { if (!isUInt<N>(V)) - error("relocation " + getRelName(Type) + " out of range"); + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); } -template <unsigned N> static void checkIntUInt(uint64_t V, uint32_t Type) { +template <unsigned N> +static void checkIntUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { if (!isInt<N>(V) && !isUInt<N>(V)) - error("relocation " + getRelName(Type) + " out of range"); + error(getErrorLocation(Loc) + "relocation " + toString(Type) + + " out of range"); } -template <unsigned N> static void checkAlignment(uint64_t V, uint32_t Type) { +template <unsigned N> +static void checkAlignment(uint8_t *Loc, uint64_t V, uint32_t Type) { if ((V & (N - 1)) != 0) - error("improper alignment for relocation " + getRelName(Type)); -} - -static void errorDynRel(uint32_t Type) { - error("relocation " + getRelName(Type) + - " cannot be used against shared object; recompile with -fPIC."); + error(getErrorLocation(Loc) + "improper alignment for relocation " + + toString(Type)); } namespace { @@ -89,6 +124,7 @@ public: bool isTlsGlobalDynamicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; @@ -106,7 +142,7 @@ template <class ELFT> class X86_64TargetInfo final : public TargetInfo { public: X86_64TargetInfo(); RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; - uint32_t getDynRel(uint32_t Type) const override; + bool isPicRel(uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; bool isTlsGlobalDynamicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; @@ -150,7 +186,7 @@ class AArch64TargetInfo final : public TargetInfo { public: AArch64TargetInfo(); RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; - uint32_t getDynRel(uint32_t Type) const override; + bool isPicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; @@ -176,14 +212,18 @@ class ARMTargetInfo final : public TargetInfo { public: ARMTargetInfo(); RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + bool isPicRel(uint32_t Type) const override; uint32_t getDynRel(uint32_t Type) const override; uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isTlsLocalDynamicRel(uint32_t Type) const override; + bool isTlsGlobalDynamicRel(uint32_t Type) const override; + bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; + void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; - RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, - const InputFile &File, + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; }; @@ -193,6 +233,7 @@ public: MipsTargetInfo(); RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + bool isPicRel(uint32_t Type) const override; uint32_t getDynRel(uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; bool isTlsGlobalDynamicRel(uint32_t Type) const override; @@ -200,8 +241,7 @@ public: void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; - RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, - const InputFile &File, + RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; bool usesOnlyLowPageBits(uint32_t Type) const override; @@ -211,34 +251,35 @@ public: TargetInfo *createTarget() { switch (Config->EMachine) { case EM_386: - return new X86TargetInfo(); + case EM_IAMCU: + return make<X86TargetInfo>(); case EM_AARCH64: - return new AArch64TargetInfo(); + return make<AArch64TargetInfo>(); case EM_AMDGPU: - return new AMDGPUTargetInfo(); + return make<AMDGPUTargetInfo>(); case EM_ARM: - return new ARMTargetInfo(); + return make<ARMTargetInfo>(); case EM_MIPS: switch (Config->EKind) { case ELF32LEKind: - return new MipsTargetInfo<ELF32LE>(); + return make<MipsTargetInfo<ELF32LE>>(); case ELF32BEKind: - return new MipsTargetInfo<ELF32BE>(); + return make<MipsTargetInfo<ELF32BE>>(); case ELF64LEKind: - return new MipsTargetInfo<ELF64LE>(); + return make<MipsTargetInfo<ELF64LE>>(); case ELF64BEKind: - return new MipsTargetInfo<ELF64BE>(); + return make<MipsTargetInfo<ELF64BE>>(); default: fatal("unsupported MIPS target"); } case EM_PPC: - return new PPCTargetInfo(); + return make<PPCTargetInfo>(); case EM_PPC64: - return new PPC64TargetInfo(); + return make<PPC64TargetInfo>(); case EM_X86_64: if (Config->EKind == ELF32LEKind) - return new X86_64TargetInfo<ELF32LE>(); - return new X86_64TargetInfo<ELF64LE>(); + return make<X86_64TargetInfo<ELF32LE>>(); + return make<X86_64TargetInfo<ELF64LE>>(); } fatal("unknown target machine"); } @@ -262,8 +303,10 @@ bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } bool TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return false; } -bool TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { - return false; +bool TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { return false; } + +void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + writeGotPlt(Buf, S); } RelExpr TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, @@ -321,10 +364,11 @@ RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { return R_TLSLD; case R_386_PLT32: return R_PLT_PC; + case R_386_PC16: case R_386_PC32: return R_PC; case R_386_GOTPC: - return R_GOTONLY_PC; + return R_GOTONLY_PC_FROM_END; case R_386_TLS_IE: return R_GOT; case R_386_GOT32: @@ -332,7 +376,7 @@ RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { case R_386_TLS_GOTIE: return R_GOT_FROM_END; case R_386_GOTOFF: - return R_GOTREL; + return R_GOTREL_FROM_END; case R_386_TLS_LE: return R_TLS; case R_386_TLS_LE_32: @@ -353,7 +397,7 @@ RelExpr X86TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, } void X86TargetInfo::writeGotPltHeader(uint8_t *Buf) const { - write32le(Buf, Out<ELF32LE>::Dynamic->getVA()); + write32le(Buf, In<ELF32LE>::Dynamic->getVA()); } void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { @@ -362,6 +406,11 @@ void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { write32le(Buf, S.getPltVA<ELF32LE>() + 6); } +void X86TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An x86 entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); +} + uint32_t X86TargetInfo::getDynRel(uint32_t Type) const { if (Type == R_386_TLS_LE) return R_386_TLS_TPOFF; @@ -401,7 +450,7 @@ void X86TargetInfo::writePltHeader(uint8_t *Buf) const { 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop }; memcpy(Buf, PltData, sizeof(PltData)); - uint32_t Got = Out<ELF32LE>::GotPlt->getVA(); + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); write32le(Buf + 2, Got + 4); write32le(Buf + 8, Got + 8); } @@ -418,7 +467,7 @@ void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, // jmp *foo@GOT(%ebx) or jmp *foo_in_GOT Buf[1] = Config->Pic ? 0xa3 : 0x25; - uint32_t Got = Out<ELF32LE>::GotPlt->getVA(); + uint32_t Got = In<ELF32LE>::GotPlt->getVA(); write32le(Buf + 2, Config->Shared ? GotEntryAddr - Got : GotEntryAddr); write32le(Buf + 7, RelOff); write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); @@ -429,6 +478,9 @@ uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, switch (Type) { default: return 0; + case R_386_16: + case R_386_PC16: + return read16le(Buf); case R_386_32: case R_386_GOT32: case R_386_GOT32X: @@ -443,7 +495,14 @@ uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, void X86TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { - checkInt<32>(Val, Type); + checkInt<32>(Loc, Val, Type); + + // R_386_PC16 and R_386_16 are not part of the current i386 psABI. They are + // used by 16-bit x86 objects, like boot loaders. + if (Type == R_386_16 || Type == R_386_PC16) { + write16le(Loc, Val); + return; + } write32le(Loc, Val); } @@ -555,6 +614,9 @@ template <class ELFT> X86_64TargetInfo<ELFT>::X86_64TargetInfo() { PltEntrySize = 16; PltHeaderSize = 16; TlsGdRelaxSkip = 2; + // Align to the large page size (known as a superpage or huge page). + // FreeBSD automatically promotes large, superpage-aligned allocations. + DefaultImageBase = 0x200000; } template <class ELFT> @@ -578,12 +640,15 @@ RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, case R_X86_64_PC64: return R_PC; case R_X86_64_GOT32: + case R_X86_64_GOT64: return R_GOT_FROM_END; case R_X86_64_GOTPCREL: case R_X86_64_GOTPCRELX: case R_X86_64_REX_GOTPCRELX: case R_X86_64_GOTTPOFF: return R_GOT_PC; + case R_X86_64_NONE: + return R_HINT; } } @@ -593,7 +658,7 @@ void X86_64TargetInfo<ELFT>::writeGotPltHeader(uint8_t *Buf) const { // required, but it is documented in the psabi and the glibc dynamic linker // seems to use it (note that this is relevant for linking ld.so, not any // other program). - write64le(Buf, Out<ELFT>::Dynamic->getVA()); + write64le(Buf, In<ELFT>::Dynamic->getVA()); } template <class ELFT> @@ -611,8 +676,8 @@ void X86_64TargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { 0x0f, 0x1f, 0x40, 0x00 // nopl 0x0(rax) }; memcpy(Buf, PltData, sizeof(PltData)); - uint64_t Got = Out<ELFT>::GotPlt->getVA(); - uint64_t Plt = Out<ELFT>::Plt->getVA(); + uint64_t Got = In<ELFT>::GotPlt->getVA(); + uint64_t Plt = In<ELFT>::Plt->getVA(); write32le(Buf + 2, Got - Plt + 2); // GOT+8 write32le(Buf + 8, Got - Plt + 4); // GOT+16 } @@ -634,10 +699,8 @@ void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, } template <class ELFT> -uint32_t X86_64TargetInfo<ELFT>::getDynRel(uint32_t Type) const { - if (Type == R_X86_64_PC32 || Type == R_X86_64_32) - errorDynRel(Type); - return Type; +bool X86_64TargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type != R_X86_64_PC32 && Type != R_X86_64_32; } template <class ELFT> @@ -736,7 +799,8 @@ void X86_64TargetInfo<ELFT>::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, memcpy(Inst, "\x48\xc7", 2); *RegSlot = 0xc0 | Reg; } else { - fatal("R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); + error(getErrorLocation(Loc - 3) + + "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); } // The original code used a PC relative relocation. @@ -778,7 +842,7 @@ void X86_64TargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { case R_X86_64_32: - checkUInt<32>(Val, Type); + checkUInt<32>(Loc, Val, Type); write32le(Loc, Val); break; case R_X86_64_32S: @@ -794,17 +858,19 @@ void X86_64TargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, case R_X86_64_TLSLD: case R_X86_64_DTPOFF32: case R_X86_64_SIZE32: - checkInt<32>(Val, Type); + checkInt<32>(Loc, Val, Type); write32le(Loc, Val); break; case R_X86_64_64: case R_X86_64_DTPOFF64: - case R_X86_64_SIZE64: + case R_X86_64_GLOB_DAT: case R_X86_64_PC64: + case R_X86_64_SIZE64: + case R_X86_64_GOT64: write64le(Loc, Val); break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } @@ -965,13 +1031,26 @@ void PPCTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, case R_PPC_ADDR16_LO: write16be(Loc, applyPPCLo(Val)); break; + case R_PPC_ADDR32: + case R_PPC_REL32: + write32be(Loc, Val); + break; + case R_PPC_REL24: + or32be(Loc, Val & 0x3FFFFFC); + break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } RelExpr PPCTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { - return R_ABS; + switch (Type) { + case R_PPC_REL24: + case R_PPC_REL32: + return R_PC; + default: + return R_ABS; + } } PPC64TargetInfo::PPC64TargetInfo() { @@ -984,7 +1063,7 @@ PPC64TargetInfo::PPC64TargetInfo() { // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). - PageSize = 65536; + DefaultMaxPageSize = 65536; // The PPC64 ELF ABI v1 spec, says: // @@ -1004,7 +1083,7 @@ uint64_t getPPC64TocBase() { // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always // the .got. - uint64_t TocVA = Out<ELF64BE>::Got->getVA(); + uint64_t TocVA = In<ELF64BE>::Got->getVA(); // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 // thus permitting a full 64 Kbytes segment. Note that the glibc startup @@ -1042,26 +1121,33 @@ void PPC64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, // be a pointer to the function descriptor in the .opd section. Using // this scheme is simpler, but requires an extra indirection per PLT dispatch. - write32be(Buf, 0xf8410028); // std %r2, 40(%r1) - write32be(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha - write32be(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) - write32be(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) - write32be(Buf + 16, 0x7d6903a6); // mtctr %r11 - write32be(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) - write32be(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) - write32be(Buf + 28, 0x4e800420); // bctr + write32be(Buf, 0xf8410028); // std %r2, 40(%r1) + write32be(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha + write32be(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) + write32be(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) + write32be(Buf + 16, 0x7d6903a6); // mtctr %r11 + write32be(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) + write32be(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) + write32be(Buf + 28, 0x4e800420); // bctr } static std::pair<uint32_t, uint64_t> toAddr16Rel(uint32_t Type, uint64_t Val) { uint64_t V = Val - PPC64TocOffset; switch (Type) { - case R_PPC64_TOC16: return {R_PPC64_ADDR16, V}; - case R_PPC64_TOC16_DS: return {R_PPC64_ADDR16_DS, V}; - case R_PPC64_TOC16_HA: return {R_PPC64_ADDR16_HA, V}; - case R_PPC64_TOC16_HI: return {R_PPC64_ADDR16_HI, V}; - case R_PPC64_TOC16_LO: return {R_PPC64_ADDR16_LO, V}; - case R_PPC64_TOC16_LO_DS: return {R_PPC64_ADDR16_LO_DS, V}; - default: return {Type, Val}; + case R_PPC64_TOC16: + return {R_PPC64_ADDR16, V}; + case R_PPC64_TOC16_DS: + return {R_PPC64_ADDR16_DS, V}; + case R_PPC64_TOC16_HA: + return {R_PPC64_ADDR16_HA, V}; + case R_PPC64_TOC16_HI: + return {R_PPC64_ADDR16_HI, V}; + case R_PPC64_TOC16_LO: + return {R_PPC64_ADDR16_LO, V}; + case R_PPC64_TOC16_LO_DS: + return {R_PPC64_ADDR16_LO_DS, V}; + default: + return {Type, Val}; } } @@ -1073,18 +1159,18 @@ void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, switch (Type) { case R_PPC64_ADDR14: { - checkAlignment<4>(Val, Type); + checkAlignment<4>(Loc, Val, Type); // Preserve the AA/LK bits in the branch instruction uint8_t AALK = Loc[3]; write16be(Loc + 2, (AALK & 3) | (Val & 0xfffc)); break; } case R_PPC64_ADDR16: - checkInt<16>(Val, Type); + checkInt<16>(Loc, Val, Type); write16be(Loc, Val); break; case R_PPC64_ADDR16_DS: - checkInt<16>(Val, Type); + checkInt<16>(Loc, Val, Type); write16be(Loc, (read16be(Loc) & 3) | (Val & ~3)); break; case R_PPC64_ADDR16_HA: @@ -1116,7 +1202,7 @@ void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, break; case R_PPC64_ADDR32: case R_PPC64_REL32: - checkInt<32>(Val, Type); + checkInt<32>(Loc, Val, Type); write32be(Loc, Val); break; case R_PPC64_ADDR64: @@ -1126,12 +1212,12 @@ void PPC64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, break; case R_PPC64_REL24: { uint32_t Mask = 0x03FFFFFC; - checkInt<24>(Val, Type); + checkInt<24>(Loc, Val, Type); write32be(Loc, (read32be(Loc) & ~Mask) | (Val & Mask)); break; } default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } @@ -1147,6 +1233,7 @@ AArch64TargetInfo::AArch64TargetInfo() { GotPltEntrySize = 8; PltEntrySize = 16; PltHeaderSize = 32; + DefaultMaxPageSize = 65536; // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant // 1 of the tls structures and the tcb size is 16. @@ -1164,7 +1251,7 @@ RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, case R_AARCH64_TLSDESC_ADD_LO12_NC: return R_TLSDESC; case R_AARCH64_TLSDESC_CALL: - return R_HINT; + return R_TLSDESC_CALL; case R_AARCH64_TLSLE_ADD_TPREL_HI12: case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: return R_TLS; @@ -1222,19 +1309,18 @@ bool AArch64TargetInfo::isTlsInitialExecRel(uint32_t Type) const { Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; } -uint32_t AArch64TargetInfo::getDynRel(uint32_t Type) const { - if (Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64) - return Type; - // Keep it going with a dummy value so that we can find more reloc errors. - errorDynRel(Type); - return R_AARCH64_ABS32; +bool AArch64TargetInfo::isPicRel(uint32_t Type) const { + return Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64; } void AArch64TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { - write64le(Buf, Out<ELF64LE>::Plt->getVA()); + write64le(Buf, In<ELF64LE>::Plt->getVA()); } -static uint64_t getAArch64Page(uint64_t Expr) { +// Page(Expr) is the page address of the expression Expr, defined +// as (Expr & ~0xFFF). (This applies even if the machine page size +// supported by the platform has a different value.) +uint64_t getAArch64Page(uint64_t Expr) { return Expr & (~static_cast<uint64_t>(0xFFF)); } @@ -1251,8 +1337,8 @@ void AArch64TargetInfo::writePltHeader(uint8_t *Buf) const { }; memcpy(Buf, PltData, sizeof(PltData)); - uint64_t Got = Out<ELF64LE>::GotPlt->getVA(); - uint64_t Plt = Out<ELF64LE>::Plt->getVA(); + uint64_t Got = In<ELF64LE>::GotPlt->getVA(); + uint64_t Plt = In<ELF64LE>::Plt->getVA(); relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21, getAArch64Page(Got + 16) - getAArch64Page(Plt + 4)); relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16); @@ -1276,14 +1362,22 @@ void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotEntryAddr); } -static void updateAArch64Addr(uint8_t *L, uint64_t Imm) { +static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); } -static inline void updateAArch64Add(uint8_t *L, uint64_t Imm) { +// Return the bits [Start, End] from Val shifted Start bits. +// For instance, getBits(0xF0, 4, 8) returns 0xF. +static uint64_t getBits(uint64_t Val, int Start, int End) { + uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1; + return (Val >> Start) & Mask; +} + +// Update the immediate field in a AARCH64 ldr, str, and add instruction. +static void or32AArch64Imm(uint8_t *L, uint64_t Imm) { or32le(L, (Imm & 0xFFF) << 10); } @@ -1292,80 +1386,89 @@ void AArch64TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, switch (Type) { case R_AARCH64_ABS16: case R_AARCH64_PREL16: - checkIntUInt<16>(Val, Type); + checkIntUInt<16>(Loc, Val, Type); write16le(Loc, Val); break; case R_AARCH64_ABS32: case R_AARCH64_PREL32: - checkIntUInt<32>(Val, Type); + checkIntUInt<32>(Loc, Val, Type); write32le(Loc, Val); break; case R_AARCH64_ABS64: + case R_AARCH64_GLOB_DAT: case R_AARCH64_PREL64: write64le(Loc, Val); break; case R_AARCH64_ADD_ABS_LO12_NC: - // This relocation stores 12 bits and there's no instruction - // to do it. Instead, we do a 32 bits store of the value - // of r_addend bitwise-or'ed Loc. This assumes that the addend - // bits in Loc are zero. - or32le(Loc, (Val & 0xFFF) << 10); + or32AArch64Imm(Loc, Val); break; case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: - checkInt<33>(Val, Type); - updateAArch64Addr(Loc, Val >> 12); + checkInt<33>(Loc, Val, Type); + write32AArch64Addr(Loc, Val >> 12); break; case R_AARCH64_ADR_PREL_LO21: - checkInt<21>(Val, Type); - updateAArch64Addr(Loc, Val); + checkInt<21>(Loc, Val, Type); + write32AArch64Addr(Loc, Val); break; case R_AARCH64_CALL26: case R_AARCH64_JUMP26: - checkInt<28>(Val, Type); + checkInt<28>(Loc, Val, Type); or32le(Loc, (Val & 0x0FFFFFFC) >> 2); break; case R_AARCH64_CONDBR19: - checkInt<21>(Val, Type); + checkInt<21>(Loc, Val, Type); or32le(Loc, (Val & 0x1FFFFC) << 3); break; case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12_NC: - checkAlignment<8>(Val, Type); + checkAlignment<8>(Loc, Val, Type); or32le(Loc, (Val & 0xFF8) << 7); break; - case R_AARCH64_LDST128_ABS_LO12_NC: - or32le(Loc, (Val & 0x0FF8) << 6); + case R_AARCH64_LDST8_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 0, 11)); break; case R_AARCH64_LDST16_ABS_LO12_NC: - or32le(Loc, (Val & 0x0FFC) << 9); - break; - case R_AARCH64_LDST8_ABS_LO12_NC: - or32le(Loc, (Val & 0xFFF) << 10); + or32AArch64Imm(Loc, getBits(Val, 1, 11)); break; case R_AARCH64_LDST32_ABS_LO12_NC: - or32le(Loc, (Val & 0xFFC) << 8); + or32AArch64Imm(Loc, getBits(Val, 2, 11)); break; case R_AARCH64_LDST64_ABS_LO12_NC: - or32le(Loc, (Val & 0xFF8) << 7); + or32AArch64Imm(Loc, getBits(Val, 3, 11)); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + or32AArch64Imm(Loc, getBits(Val, 4, 11)); + break; + case R_AARCH64_MOVW_UABS_G0_NC: + or32le(Loc, (Val & 0xFFFF) << 5); + break; + case R_AARCH64_MOVW_UABS_G1_NC: + or32le(Loc, (Val & 0xFFFF0000) >> 11); + break; + case R_AARCH64_MOVW_UABS_G2_NC: + or32le(Loc, (Val & 0xFFFF00000000) >> 27); + break; + case R_AARCH64_MOVW_UABS_G3: + or32le(Loc, (Val & 0xFFFF000000000000) >> 43); break; case R_AARCH64_TSTBR14: - checkInt<16>(Val, Type); + checkInt<16>(Loc, Val, Type); or32le(Loc, (Val & 0xFFFC) << 3); break; case R_AARCH64_TLSLE_ADD_TPREL_HI12: - checkInt<24>(Val, Type); - updateAArch64Add(Loc, Val >> 12); + checkInt<24>(Loc, Val, Type); + or32AArch64Imm(Loc, Val >> 12); break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12_NC: - updateAArch64Add(Loc, Val); + or32AArch64Imm(Loc, Val); break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } @@ -1382,7 +1485,7 @@ void AArch64TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, // movk x0, #0x10 // nop // nop - checkUInt<32>(Val, Type); + checkUInt<32>(Loc, Val, Type); switch (Type) { case R_AARCH64_TLSDESC_ADD_LO12_NC: @@ -1434,7 +1537,7 @@ void AArch64TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, void AArch64TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { - checkUInt<32>(Val, Type); + checkUInt<32>(Loc, Val, Type); if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { // Generate MOVZ. @@ -1452,6 +1555,7 @@ void AArch64TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, } AMDGPUTargetInfo::AMDGPUTargetInfo() { + RelativeRel = R_AMDGPU_REL64; GotRel = R_AMDGPU_ABS64; GotEntrySize = 8; } @@ -1459,20 +1563,37 @@ AMDGPUTargetInfo::AMDGPUTargetInfo() { void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { + case R_AMDGPU_ABS32: case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: write32le(Loc, Val); break; + case R_AMDGPU_ABS64: + write64le(Loc, Val); + break; + case R_AMDGPU_GOTPCREL32_HI: + case R_AMDGPU_REL32_HI: + write32le(Loc, Val >> 32); + break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { switch (Type) { + case R_AMDGPU_ABS32: + case R_AMDGPU_ABS64: + return R_ABS; case R_AMDGPU_REL32: + case R_AMDGPU_REL32_LO: + case R_AMDGPU_REL32_HI: return R_PC; case R_AMDGPU_GOTPCREL: + case R_AMDGPU_GOTPCREL32_LO: + case R_AMDGPU_GOTPCREL32_HI: return R_GOT_PC; default: fatal("do not know how to handle relocation " + Twine(Type)); @@ -1492,6 +1613,9 @@ ARMTargetInfo::ARMTargetInfo() { GotPltEntrySize = 4; PltEntrySize = 16; PltHeaderSize = 20; + // ARM uses Variant 1 TLS + TcbSize = 8; + NeedsThunks = true; } RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { @@ -1504,6 +1628,7 @@ RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: + case R_ARM_PREL31: case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: case R_ARM_THM_CALL: @@ -1515,8 +1640,21 @@ RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { // GOT(S) + A - GOT_ORG return R_GOT_OFF; case R_ARM_GOT_PREL: - // GOT(S) + - GOT_ORG + case R_ARM_TLS_IE32: + // GOT(S) + A - P + return R_GOT_PC; + case R_ARM_TARGET1: + return Config->Target1Rel ? R_PC : R_ABS; + case R_ARM_TARGET2: + if (Config->Target2 == Target2Policy::Rel) + return R_PC; + if (Config->Target2 == Target2Policy::Abs) + return R_ABS; return R_GOT_PC; + case R_ARM_TLS_GD32: + return R_TLSGD_PC; + case R_ARM_TLS_LDM32: + return R_TLSLD_PC; case R_ARM_BASE_PREL: // B(S) + A - P // FIXME: currently B(S) assumed to be .got, this may not hold for all @@ -1524,24 +1662,38 @@ RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { return R_GOTONLY_PC; case R_ARM_MOVW_PREL_NC: case R_ARM_MOVT_PREL: - case R_ARM_PREL31: case R_ARM_REL32: case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: return R_PC; + case R_ARM_NONE: + return R_HINT; + case R_ARM_TLS_LE32: + return R_TLS; } } +bool ARMTargetInfo::isPicRel(uint32_t Type) const { + return (Type == R_ARM_TARGET1 && !Config->Target1Rel) || + (Type == R_ARM_ABS32); +} + uint32_t ARMTargetInfo::getDynRel(uint32_t Type) const { + if (Type == R_ARM_TARGET1 && !Config->Target1Rel) + return R_ARM_ABS32; if (Type == R_ARM_ABS32) return Type; // Keep it going with a dummy value so that we can find more reloc errors. - errorDynRel(Type); return R_ARM_ABS32; } void ARMTargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { - write32le(Buf, Out<ELF32LE>::Plt->getVA()); + write32le(Buf, In<ELF32LE>::Plt->getVA()); +} + +void ARMTargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { + // An ARM entry is the address of the ifunc resolver function. + write32le(Buf, S.getVA<ELF32LE>()); } void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { @@ -1553,8 +1705,8 @@ void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { 0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8 }; memcpy(Buf, PltData, sizeof(PltData)); - uint64_t GotPlt = Out<ELF32LE>::GotPlt->getVA(); - uint64_t L1 = Out<ELF32LE>::Plt->getVA() + 8; + uint64_t GotPlt = In<ELF32LE>::GotPlt->getVA(); + uint64_t L1 = In<ELF32LE>::Plt->getVA() + 8; write32le(Buf + 16, GotPlt - L1 - 8); } @@ -1578,6 +1730,9 @@ void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, const SymbolBody &S) const { + // If S is an undefined weak symbol we don't need a Thunk + if (S.isUndefined()) + return Expr; // A state change from ARM to Thumb and vice versa must go through an // interworking thunk if the relocation type is not R_ARM_CALL or // R_ARM_THM_CALL. @@ -1606,18 +1761,29 @@ RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { - case R_ARM_NONE: - break; case R_ARM_ABS32: case R_ARM_BASE_PREL: + case R_ARM_GLOB_DAT: case R_ARM_GOTOFF32: case R_ARM_GOT_BREL: case R_ARM_GOT_PREL: case R_ARM_REL32: + case R_ARM_RELATIVE: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_LE32: + case R_ARM_TLS_TPOFF32: write32le(Loc, Val); break; + case R_ARM_TLS_DTPMOD32: + write32le(Loc, 1); + break; case R_ARM_PREL31: - checkInt<31>(Val, Type); + checkInt<31>(Loc, Val, Type); write32le(Loc, (read32le(Loc) & 0x80000000) | (Val & ~0x80000000)); break; case R_ARM_CALL: @@ -1626,7 +1792,7 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, if (Val & 1) { // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' - checkInt<26>(Val, Type); + checkInt<26>(Loc, Val, Type); write32le(Loc, 0xfa000000 | // opcode ((Val & 2) << 23) | // H ((Val >> 2) & 0x00ffffff)); // imm24 @@ -1636,20 +1802,20 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, // BLX (always unconditional) instruction to an ARM Target, select an // unconditional BL. write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff)); - // fall through as BL encoding is shared with B + // fall through as BL encoding is shared with B case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: - checkInt<26>(Val, Type); + checkInt<26>(Loc, Val, Type); write32le(Loc, (read32le(Loc) & ~0x00ffffff) | ((Val >> 2) & 0x00ffffff)); break; case R_ARM_THM_JUMP11: - checkInt<12>(Val, Type); + checkInt<12>(Loc, Val, Type); write16le(Loc, (read32le(Loc) & 0xf800) | ((Val >> 1) & 0x07ff)); break; case R_ARM_THM_JUMP19: // Encoding T3: Val = S:J2:J1:imm6:imm11:0 - checkInt<21>(Val, Type); + checkInt<21>(Loc, Val, Type); write16le(Loc, (read16le(Loc) & 0xfbc0) | // opcode cond ((Val >> 10) & 0x0400) | // S @@ -1670,11 +1836,11 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, } // Bit 12 is 0 for BLX, 1 for BL write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12); - // Fall through as rest of encoding is the same as B.W + // Fall through as rest of encoding is the same as B.W case R_ARM_THM_JUMP24: // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 // FIXME: Use of I1 and I2 require v6T2ops - checkInt<25>(Val, Type); + checkInt<25>(Loc, Val, Type); write16le(Loc, 0xf000 | // opcode ((Val >> 14) & 0x0400) | // S @@ -1692,14 +1858,14 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, break; case R_ARM_MOVT_ABS: case R_ARM_MOVT_PREL: - checkInt<32>(Val, Type); + checkInt<32>(Loc, Val, Type); write32le(Loc, (read32le(Loc) & ~0x000f0fff) | (((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff)); break; case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVT_PREL: // Encoding T1: A = imm4:i:imm3:imm8 - checkInt<32>(Val, Type); + checkInt<32>(Loc, Val, Type); write16le(Loc, 0xf2c0 | // opcode ((Val >> 17) & 0x0400) | // i @@ -1722,7 +1888,7 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, (Val & 0x00ff)); // imm8 break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } @@ -1737,6 +1903,13 @@ uint64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, case R_ARM_GOT_BREL: case R_ARM_GOT_PREL: case R_ARM_REL32: + case R_ARM_TARGET1: + case R_ARM_TARGET2: + case R_ARM_TLS_GD32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: return SignExtend64<32>(read32le(Buf)); case R_ARM_PREL31: return SignExtend64<31>(read32le(Buf)); @@ -1794,15 +1967,28 @@ uint64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, } } +bool ARMTargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_LDO32 || Type == R_ARM_TLS_LDM32; +} + +bool ARMTargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { + return Type == R_ARM_TLS_GD32; +} + +bool ARMTargetInfo::isTlsInitialExecRel(uint32_t Type) const { + return Type == R_ARM_TLS_IE32; +} + template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { GotPltHeaderEntriesNum = 2; - PageSize = 65536; + DefaultMaxPageSize = 65536; GotEntrySize = sizeof(typename ELFT::uint); GotPltEntrySize = sizeof(typename ELFT::uint); PltEntrySize = 16; PltHeaderSize = 32; CopyRel = R_MIPS_COPY; PltRel = R_MIPS_JUMP_SLOT; + NeedsThunks = true; if (ELFT::Is64Bits) { RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32; TlsGotRel = R_MIPS_TLS_TPREL64; @@ -1819,8 +2005,8 @@ template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { template <class ELFT> RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, const SymbolBody &S) const { - if (ELFT::Is64Bits) - // See comment in the calculateMips64RelChain. + // See comment in the calculateMipsRelChain. + if (ELFT::Is64Bits || Config->MipsN32Abi) Type &= 0xff; switch (Type) { default: @@ -1829,16 +2015,16 @@ RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, return R_HINT; case R_MIPS_GPREL16: case R_MIPS_GPREL32: - return R_GOTREL; + return R_MIPS_GOTREL; case R_MIPS_26: return R_PLT; case R_MIPS_HI16: case R_MIPS_LO16: case R_MIPS_GOT_OFST: - // MIPS _gp_disp designates offset between start of function and 'gp' - // pointer into GOT. __gnu_local_gp is equal to the current value of - // the 'gp'. Therefore any relocations against them do not require - // dynamic relocation. + // R_MIPS_HI16/R_MIPS_LO16 relocations against _gp_disp calculate + // offset between start of function and 'gp' value which by default + // equal to the start of .got section. In that case we consider these + // relocations as relative. if (&S == ElfSym<ELFT>::MipsGpDisp) return R_PC; return R_ABS; @@ -1858,6 +2044,11 @@ RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, case R_MIPS_GOT_DISP: case R_MIPS_TLS_GOTTPREL: return R_MIPS_GOT_OFF; + case R_MIPS_CALL_HI16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_HI16: + case R_MIPS_GOT_LO16: + return R_MIPS_GOT_OFF32; case R_MIPS_GOT_PAGE: return R_MIPS_GOT_LOCAL_PAGE; case R_MIPS_TLS_GD: @@ -1867,13 +2058,13 @@ RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, } } +template <class ELFT> bool MipsTargetInfo<ELFT>::isPicRel(uint32_t Type) const { + return Type == R_MIPS_32 || Type == R_MIPS_64; +} + template <class ELFT> uint32_t MipsTargetInfo<ELFT>::getDynRel(uint32_t Type) const { - if (Type == R_MIPS_32 || Type == R_MIPS_64) - return RelativeRel; - // Keep it going with a dummy value so that we can find more reloc errors. - errorDynRel(Type); - return R_MIPS_32; + return RelativeRel; } template <class ELFT> @@ -1888,11 +2079,9 @@ bool MipsTargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { template <class ELFT> void MipsTargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { - write32<ELFT::TargetEndianness>(Buf, Out<ELFT>::Plt->getVA()); + write32<ELFT::TargetEndianness>(Buf, In<ELFT>::Plt->getVA()); } -static uint16_t mipsHigh(uint64_t V) { return (V + 0x8000) >> 16; } - template <endianness E, uint8_t BSIZE, uint8_t SHIFT> static int64_t getPcRelocAddend(const uint8_t *Loc) { uint32_t Instr = read32<E>(Loc); @@ -1905,35 +2094,59 @@ static void applyMipsPcReloc(uint8_t *Loc, uint32_t Type, uint64_t V) { uint32_t Mask = 0xffffffff >> (32 - BSIZE); uint32_t Instr = read32<E>(Loc); if (SHIFT > 0) - checkAlignment<(1 << SHIFT)>(V, Type); - checkInt<BSIZE + SHIFT>(V, Type); + checkAlignment<(1 << SHIFT)>(Loc, V, Type); + checkInt<BSIZE + SHIFT>(Loc, V, Type); write32<E>(Loc, (Instr & ~Mask) | ((V >> SHIFT) & Mask)); } -template <endianness E> -static void writeMipsHi16(uint8_t *Loc, uint64_t V) { +template <endianness E> static void writeMipsHi16(uint8_t *Loc, uint64_t V) { uint32_t Instr = read32<E>(Loc); - write32<E>(Loc, (Instr & 0xffff0000) | mipsHigh(V)); + uint16_t Res = ((V + 0x8000) >> 16) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); } -template <endianness E> -static void writeMipsLo16(uint8_t *Loc, uint64_t V) { +template <endianness E> static void writeMipsHigher(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x80008000) >> 32) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsHighest(uint8_t *Loc, uint64_t V) { + uint32_t Instr = read32<E>(Loc); + uint16_t Res = ((V + 0x800080008000) >> 48) & 0xffff; + write32<E>(Loc, (Instr & 0xffff0000) | Res); +} + +template <endianness E> static void writeMipsLo16(uint8_t *Loc, uint64_t V) { uint32_t Instr = read32<E>(Loc); write32<E>(Loc, (Instr & 0xffff0000) | (V & 0xffff)); } +template <class ELFT> static bool isMipsR6() { + const auto &FirstObj = cast<ELFFileBase<ELFT>>(*Config->FirstElf); + uint32_t Arch = FirstObj.getObj().getHeader()->e_flags & EF_MIPS_ARCH; + return Arch == EF_MIPS_ARCH_32R6 || Arch == EF_MIPS_ARCH_64R6; +} + template <class ELFT> void MipsTargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { const endianness E = ELFT::TargetEndianness; - write32<E>(Buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) - write32<E>(Buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) - write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) - write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28 + if (Config->MipsN32Abi) { + write32<E>(Buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14) + write32<E>(Buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x030ec023); // subu $24, $24, $14 + } else { + write32<E>(Buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) + write32<E>(Buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) + write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) + write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28 + } write32<E>(Buf + 16, 0x03e07825); // move $15, $31 write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2 write32<E>(Buf + 24, 0x0320f809); // jalr $25 write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2 - uint64_t Got = Out<ELFT>::GotPlt->getVA(); + uint64_t Got = In<ELFT>::GotPlt->getVA(); writeMipsHi16<E>(Buf, Got); writeMipsLo16<E>(Buf + 4, Got); writeMipsLo16<E>(Buf + 8, Got); @@ -1944,9 +2157,10 @@ void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const endianness E = ELFT::TargetEndianness; - write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) - write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15) - write32<E>(Buf + 8, 0x03200008); // jr $25 + write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) + write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15) + // jr $25 + write32<E>(Buf + 8, isMipsR6<ELFT>() ? 0x03200009 : 0x03200008); write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry) writeMipsHi16<E>(Buf, GotEntryAddr); writeMipsLo16<E>(Buf + 4, GotEntryAddr); @@ -1971,14 +2185,9 @@ RelExpr MipsTargetInfo<ELFT>::getThunkExpr(RelExpr Expr, uint32_t Type, if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC) return Expr; auto *D = dyn_cast<DefinedRegular<ELFT>>(&S); - if (!D || !D->Section) - return Expr; // LA25 is required if target file has PIC code // or target symbol is a PIC symbol. - const ELFFile<ELFT> &DefFile = D->Section->getFile()->getObj(); - bool PicFile = DefFile.getHeader()->e_flags & EF_MIPS_PIC; - bool PicSym = (D->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC; - return (PicFile || PicSym) ? R_THUNK_ABS : Expr; + return D && D->isMipsPIC() ? R_THUNK_ABS : Expr; } template <class ELFT> @@ -1990,12 +2199,14 @@ uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, return 0; case R_MIPS_32: case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: return read32<E>(Buf); case R_MIPS_26: // FIXME (simon): If the relocation target symbol is not a PLT entry // we should use another expression for calculation: // ((A << 2) | (P & 0xf0000000)) >> 2 - return SignExtend64<28>(read32<E>(Buf) << 2); + return SignExtend64<28>((read32<E>(Buf) & 0x3ffffff) << 2); case R_MIPS_GPREL16: case R_MIPS_LO16: case R_MIPS_PCLO16: @@ -2017,8 +2228,8 @@ uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, } } -static std::pair<uint32_t, uint64_t> calculateMips64RelChain(uint32_t Type, - uint64_t Val) { +static std::pair<uint32_t, uint64_t> +calculateMipsRelChain(uint8_t *Loc, uint32_t Type, uint64_t Val) { // MIPS N64 ABI packs multiple relocations into the single relocation // record. In general, all up to three relocations can have arbitrary // types. In fact, Clang and GCC uses only a few combinations. For now, @@ -2039,7 +2250,8 @@ static std::pair<uint32_t, uint64_t> calculateMips64RelChain(uint32_t Type, return std::make_pair(Type2, Val); if (Type2 == R_MIPS_SUB && (Type3 == R_MIPS_HI16 || Type3 == R_MIPS_LO16)) return std::make_pair(Type3, -Val); - error("unsupported relocations combination " + Twine(Type)); + error(getErrorLocation(Loc) + "unsupported relocations combination " + + Twine(Type)); return std::make_pair(Type & 0xff, Val); } @@ -2049,22 +2261,28 @@ void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, const endianness E = ELFT::TargetEndianness; // Thread pointer and DRP offsets from the start of TLS data area. // https://www.linux-mips.org/wiki/NPTL - if (Type == R_MIPS_TLS_DTPREL_HI16 || Type == R_MIPS_TLS_DTPREL_LO16) + if (Type == R_MIPS_TLS_DTPREL_HI16 || Type == R_MIPS_TLS_DTPREL_LO16 || + Type == R_MIPS_TLS_DTPREL32 || Type == R_MIPS_TLS_DTPREL64) Val -= 0x8000; - else if (Type == R_MIPS_TLS_TPREL_HI16 || Type == R_MIPS_TLS_TPREL_LO16) + else if (Type == R_MIPS_TLS_TPREL_HI16 || Type == R_MIPS_TLS_TPREL_LO16 || + Type == R_MIPS_TLS_TPREL32 || Type == R_MIPS_TLS_TPREL64) Val -= 0x7000; - if (ELFT::Is64Bits) - std::tie(Type, Val) = calculateMips64RelChain(Type, Val); + if (ELFT::Is64Bits || Config->MipsN32Abi) + std::tie(Type, Val) = calculateMipsRelChain(Loc, Type, Val); switch (Type) { case R_MIPS_32: case R_MIPS_GPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: write32<E>(Loc, Val); break; case R_MIPS_64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: write64<E>(Loc, Val); break; case R_MIPS_26: - write32<E>(Loc, (read32<E>(Loc) & ~0x3ffffff) | (Val >> 2)); + write32<E>(Loc, (read32<E>(Loc) & ~0x3ffffff) | ((Val >> 2) & 0x3ffffff)); break; case R_MIPS_GOT_DISP: case R_MIPS_GOT_PAGE: @@ -2072,9 +2290,11 @@ void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, case R_MIPS_GPREL16: case R_MIPS_TLS_GD: case R_MIPS_TLS_LDM: - checkInt<16>(Val, Type); + checkInt<16>(Loc, Val, Type); // fallthrough case R_MIPS_CALL16: + case R_MIPS_CALL_LO16: + case R_MIPS_GOT_LO16: case R_MIPS_GOT_OFST: case R_MIPS_LO16: case R_MIPS_PCLO16: @@ -2083,12 +2303,20 @@ void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, case R_MIPS_TLS_TPREL_LO16: writeMipsLo16<E>(Loc, Val); break; + case R_MIPS_CALL_HI16: + case R_MIPS_GOT_HI16: case R_MIPS_HI16: case R_MIPS_PCHI16: case R_MIPS_TLS_DTPREL_HI16: case R_MIPS_TLS_TPREL_HI16: writeMipsHi16<E>(Loc, Val); break; + case R_MIPS_HIGHER: + writeMipsHigher<E>(Loc, Val); + break; + case R_MIPS_HIGHEST: + writeMipsHighest<E>(Loc, Val); + break; case R_MIPS_JALR: // Ignore this optimization relocation for now break; @@ -2108,7 +2336,7 @@ void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, applyMipsPcReloc<E, 32, 0>(Loc, Type, Val); break; default: - fatal("unrecognized reloc " + Twine(Type)); + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } diff --git a/contrib/llvm/tools/lld/ELF/Target.h b/contrib/llvm/tools/lld/ELF/Target.h index d335c1e051b7..156a2c023230 100644 --- a/contrib/llvm/tools/lld/ELF/Target.h +++ b/contrib/llvm/tools/lld/ELF/Target.h @@ -26,9 +26,11 @@ public: virtual bool isTlsInitialExecRel(uint32_t Type) const; virtual bool isTlsLocalDynamicRel(uint32_t Type) const; virtual bool isTlsGlobalDynamicRel(uint32_t Type) const; + virtual bool isPicRel(uint32_t Type) const { return true; } virtual uint32_t getDynRel(uint32_t Type) const { return Type; } virtual void writeGotPltHeader(uint8_t *Buf) const {} virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {}; + virtual void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const; virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; // If lazy binding is supported, the first entry of the PLT has code @@ -62,13 +64,12 @@ public: unsigned TlsGdRelaxSkip = 1; unsigned PageSize = 4096; + unsigned DefaultMaxPageSize = 4096; - // On freebsd x86_64 the first page cannot be mmaped. - // On linux that is controled by vm.mmap_min_addr. At least on some x86_64 + // On FreeBSD x86_64 the first page cannot be mmaped. + // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64 // installs that is 65536, so the first 15 pages cannot be used. // Given that, the smallest value that can be used in here is 0x10000. - // If using 2MB pages, the smallest page aligned address that works is - // 0x200000, but it looks like every OS uses 4k pages for executables. uint64_t DefaultImageBase = 0x10000; uint32_t CopyRel; @@ -80,8 +81,8 @@ public: uint32_t TlsGotRel; uint32_t TlsModuleIndexRel; uint32_t TlsOffsetRel; - unsigned GotEntrySize; - unsigned GotPltEntrySize; + unsigned GotEntrySize = 0; + unsigned GotPltEntrySize = 0; unsigned PltEntrySize; unsigned PltHeaderSize; @@ -92,6 +93,8 @@ public: // Set to 0 for variant 2 unsigned TcbSize = 0; + bool NeedsThunks = false; + virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; @@ -101,10 +104,9 @@ public: virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; }; -StringRef getRelName(uint32_t Type); +std::string toString(uint32_t RelType); uint64_t getPPC64TocBase(); - -const unsigned MipsGPOffset = 0x7ff0; +uint64_t getAArch64Page(uint64_t Expr); extern TargetInfo *Target; TargetInfo *createTarget(); diff --git a/contrib/llvm/tools/lld/ELF/Threads.h b/contrib/llvm/tools/lld/ELF/Threads.h new file mode 100644 index 000000000000..c03e15253e15 --- /dev/null +++ b/contrib/llvm/tools/lld/ELF/Threads.h @@ -0,0 +1,90 @@ +//===- Threads.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LLD supports threads to distribute workloads to multiple cores. Using +// multicore is most effective when more than one core are idle. At the +// last step of a build, it is often the case that a linker is the only +// active process on a computer. So, we are naturally interested in using +// threads wisely to reduce latency to deliver results to users. +// +// That said, we don't want to do "too clever" things using threads. +// Complex multi-threaded algorithms are sometimes extremely hard to +// justify the correctness and can easily mess up the entire design. +// +// Fortunately, when a linker links large programs (when the link time is +// most critical), it spends most of the time to work on massive number of +// small pieces of data of the same kind, and there are opportunities for +// large parallelism there. Here are examples: +// +// - We have hundreds of thousands of input sections that need to be +// copied to a result file at the last step of link. Once we fix a file +// layout, each section can be copied to its destination and its +// relocations can be applied independently. +// +// - We have tens of millions of small strings when constructing a +// mergeable string section. +// +// For the cases such as the former, we can just use parallel_for_each +// instead of std::for_each (or a plain for loop). Because tasks are +// completely independent from each other, we can run them in parallel +// without any coordination between them. That's very easy to understand +// and justify. +// +// For the cases such as the latter, we can use parallel algorithms to +// deal with massive data. We have to write code for a tailored algorithm +// for each problem, but the complexity of multi-threading is isolated in +// a single pass and doesn't affect the linker's overall design. +// +// The above approach seems to be working fairly well. As an example, when +// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to +// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my +// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from +// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the +// speedup is not linear, but as you add more cores, it gets faster. +// +// On a final note, if you are trying to optimize, keep the axiom "don't +// guess, measure!" in mind. Some important passes of the linker are not +// that slow. For example, resolving all symbols is not a very heavy pass, +// although it would be very hard to parallelize it. You want to first +// identify a slow pass and then optimize it. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_THREADS_H +#define LLD_ELF_THREADS_H + +#include "Config.h" + +#include "lld/Core/Parallel.h" +#include <algorithm> +#include <functional> + +namespace lld { +namespace elf { + +template <class IterTy, class FuncTy> +void forEach(IterTy Begin, IterTy End, FuncTy Fn) { + if (Config->Threads) + parallel_for_each(Begin, End, Fn); + else + std::for_each(Begin, End, Fn); +} + +inline void forLoop(size_t Begin, size_t End, std::function<void(size_t)> Fn) { + if (Config->Threads) { + parallel_for(Begin, End, Fn); + } else { + for (size_t I = Begin; I < End; ++I) + Fn(I); + } +} +} +} + +#endif diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp index 1ebbb17f3032..34b630ac2510 100644 --- a/contrib/llvm/tools/lld/ELF/Thunks.cpp +++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp @@ -22,17 +22,20 @@ //===---------------------------------------------------------------------===// #include "Thunks.h" +#include "Config.h" #include "Error.h" -#include "InputFiles.h" #include "InputSection.h" +#include "Memory.h" #include "OutputSections.h" #include "Symbols.h" #include "Target.h" -#include "llvm/Support/Allocator.h" - -#include "llvm/Object/ELF.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cstdint> +#include <cstring> using namespace llvm; using namespace llvm::object; @@ -43,6 +46,7 @@ namespace lld { namespace elf { namespace { + // Specific ARM Thunk implementations. The naming convention is: // Source State, TargetState, Target Requirement, ABS or PI, Range template <class ELFT> @@ -96,11 +100,13 @@ public: uint32_t size() const override { return 16; } void writeTo(uint8_t *Buf) const override; }; -} // anonymous namespace + +} // end anonymous namespace // ARM Target Thunks template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) { - return S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>(); + uint64_t V = S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>(); + return SignExtend64<32>(V); } template <class ELFT> @@ -177,10 +183,10 @@ Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O) : Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {} template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const { - return Owner.OutSec->getVA() + Owner.OutSecOff + Offset; + return Owner.OutSec->Addr + Owner.OutSecOff + Offset; } -template <class ELFT> Thunk<ELFT>::~Thunk() {} +template <class ELFT> Thunk<ELFT>::~Thunk() = default; // Creates a thunk for Thumb-ARM interworking. template <class ELFT> @@ -189,19 +195,18 @@ static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S, // ARM relocations need ARM to Thumb interworking Thunks. // Thumb relocations need Thumb to ARM relocations. // Use position independent Thunks if we require position independent code. - BumpPtrAllocator &Alloc = IS.getFile()->Alloc; switch (Reloc) { case R_ARM_PC24: case R_ARM_PLT32: case R_ARM_JUMP24: if (Config->Pic) - return new (Alloc) ARMToThumbV7PILongThunk<ELFT>(S, IS); - return new (Alloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS); + return new (BAlloc) ARMToThumbV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS); case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: if (Config->Pic) - return new (Alloc) ThumbToARMV7PILongThunk<ELFT>(S, IS); - return new (Alloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS); + return new (BAlloc) ThumbToARMV7PILongThunk<ELFT>(S, IS); + return new (BAlloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS); } fatal("unrecognized relocation type"); } @@ -235,7 +240,7 @@ static void addThunkMips(uint32_t RelocType, SymbolBody &S, // Mips Thunks are added to the InputSection defining S. auto *R = cast<DefinedRegular<ELFT>>(&S); auto *Sec = cast<InputSection<ELFT>>(R->Section); - auto *T = new (IS.getFile()->Alloc) MipsThunk<ELFT>(S, *Sec); + auto *T = new (BAlloc) MipsThunk<ELFT>(S, *Sec); Sec->addThunk(T); R->ThunkData = T; } @@ -264,5 +269,5 @@ template class Thunk<ELF32BE>; template class Thunk<ELF64LE>; template class Thunk<ELF64BE>; -} // namespace elf -} // namespace lld +} // end namespace elf +} // end namespace lld diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp index 387bec3d8fb2..e056384fbd44 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.cpp +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -10,21 +10,26 @@ #include "Writer.h" #include "Config.h" #include "LinkerScript.h" +#include "Memory.h" #include "OutputSections.h" #include "Relocations.h" #include "Strings.h" #include "SymbolTable.h" +#include "SyntheticSections.h" #include "Target.h" - #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/StringSaver.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" +#include <climits> +#include <thread> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; @@ -40,32 +45,25 @@ public: typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::SymRange Elf_Sym_Range; typedef typename ELFT::Rela Elf_Rela; - Writer(SymbolTable<ELFT> &S) : Symtab(S) {} void run(); private: - // This describes a program header entry. - // Each contains type, access flags and range of output sections that will be - // placed in it. - struct Phdr { - Phdr(unsigned Type, unsigned Flags) { - H.p_type = Type; - H.p_flags = Flags; - } - Elf_Phdr H = {}; - OutputSectionBase<ELFT> *First = nullptr; - OutputSectionBase<ELFT> *Last = nullptr; - }; - + void createSyntheticSections(); void copyLocalSymbols(); void addReservedSymbols(); + void addInputSec(InputSectionBase<ELFT> *S); void createSections(); + void forEachRelSec(std::function<void(InputSectionBase<ELFT> &)> Fn); + void sortSections(); + void finalizeSections(); void addPredefinedSections(); - bool needsGot(); - void createPhdrs(); + std::vector<PhdrEntry> createPhdrs(); + void removeEmptyPTLoad(); + void addPtArmExid(std::vector<PhdrEntry> &Phdrs); void assignAddresses(); void assignFileOffsets(); + void assignFileOffsetsBinary(); void setPhdrs(); void fixHeaders(); void fixSectionAlignments(); @@ -73,214 +71,310 @@ private: void openFile(); void writeHeader(); void writeSections(); + void writeSectionsBinary(); void writeBuildId(); - bool needsInterpSection() const { - return !Symtab.getSharedFiles().empty() && !Config->DynamicLinker.empty(); - } - bool isOutputDynamic() const { - return !Symtab.getSharedFiles().empty() || Config->Pic; - } - - void addCommonSymbols(std::vector<DefinedCommon *> &Syms); std::unique_ptr<FileOutputBuffer> Buffer; - BumpPtrAllocator Alloc; - std::vector<OutputSectionBase<ELFT> *> OutputSections; - std::vector<std::unique_ptr<OutputSectionBase<ELFT>>> OwningSections; + std::vector<OutputSectionBase *> OutputSections; + OutputSectionFactory<ELFT> Factory; void addRelIpltSymbols(); void addStartEndSymbols(); - void addStartStopSymbols(OutputSectionBase<ELFT> *Sec); + void addStartStopSymbols(OutputSectionBase *Sec); + uintX_t getEntryAddr(); + OutputSectionBase *findSection(StringRef Name); - SymbolTable<ELFT> &Symtab; - std::vector<Phdr> Phdrs; + std::vector<PhdrEntry> Phdrs; uintX_t FileSize; uintX_t SectionHeaderOff; + bool AllocateHeader = true; }; } // anonymous namespace -template <class ELFT> -StringRef elf::getOutputSectionName(InputSectionBase<ELFT> *S) { - StringRef Dest = Script<ELFT>::X->getOutputSection(S); - if (!Dest.empty()) - return Dest; - - StringRef Name = S->getSectionName(); - for (StringRef V : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", - ".init_array.", ".fini_array.", ".ctors.", ".dtors.", - ".tbss.", ".gcc_except_table.", ".tdata."}) - if (Name.startswith(V)) - return V.drop_back(); +StringRef elf::getOutputSectionName(StringRef Name) { + if (Config->Relocatable) + return Name; + + for (StringRef V : + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", + ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + ".gcc_except_table.", ".tdata.", ".ARM.exidx."}) { + StringRef Prefix = V.drop_back(); + if (Name.startswith(V) || Name == Prefix) + return Prefix; + } + + // CommonSection is identified as "COMMON" in linker scripts. + // By default, it should go to .bss section. + if (Name == "COMMON") + return ".bss"; + + // ".zdebug_" is a prefix for ZLIB-compressed sections. + // Because we decompressed input sections, we want to remove 'z'. + if (Name.startswith(".zdebug_")) + return Saver.save(Twine(".") + Name.substr(2)); return Name; } -template <class ELFT> -void elf::reportDiscarded(InputSectionBase<ELFT> *IS, - const std::unique_ptr<elf::ObjectFile<ELFT>> &File) { - if (!Config->PrintGcSections || !IS || IS->Live) +template <class ELFT> void elf::reportDiscarded(InputSectionBase<ELFT> *IS) { + if (!Config->PrintGcSections) return; - errs() << "removing unused section from '" << IS->getSectionName() - << "' in file '" << File->getName() << "'\n"; + errs() << "removing unused section from '" << IS->Name << "' in file '" + << IS->getFile()->getName() << "'\n"; } -template <class ELFT> void elf::writeResult(SymbolTable<ELFT> *Symtab) { - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Ehdr Elf_Ehdr; +template <class ELFT> static bool needsInterpSection() { + return !Symtab<ELFT>::X->getSharedFiles().empty() && + !Config->DynamicLinker.empty() && + !Script<ELFT>::X->ignoreInterpSection(); +} - // Create singleton output sections. - OutputSection<ELFT> Bss(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); - DynamicSection<ELFT> Dynamic; - EhOutputSection<ELFT> EhFrame; - GotSection<ELFT> Got; - InterpSection<ELFT> Interp; - PltSection<ELFT> Plt; - RelocationSection<ELFT> RelaDyn(Config->Rela ? ".rela.dyn" : ".rel.dyn", - Config->ZCombreloc); - StringTableSection<ELFT> DynStrTab(".dynstr", true); - StringTableSection<ELFT> ShStrTab(".shstrtab", false); - SymbolTableSection<ELFT> DynSymTab(DynStrTab); - VersionTableSection<ELFT> VerSym; - VersionNeedSection<ELFT> VerNeed; - - OutputSectionBase<ELFT> ElfHeader("", 0, SHF_ALLOC); - ElfHeader.setSize(sizeof(Elf_Ehdr)); - OutputSectionBase<ELFT> ProgramHeaders("", 0, SHF_ALLOC); - ProgramHeaders.updateAlignment(sizeof(uintX_t)); - - // Instantiate optional output sections if they are needed. - std::unique_ptr<BuildIdSection<ELFT>> BuildId; - std::unique_ptr<EhFrameHeader<ELFT>> EhFrameHdr; - std::unique_ptr<GnuHashTableSection<ELFT>> GnuHashTab; - std::unique_ptr<GotPltSection<ELFT>> GotPlt; - std::unique_ptr<HashTableSection<ELFT>> HashTab; - std::unique_ptr<RelocationSection<ELFT>> RelaPlt; - std::unique_ptr<StringTableSection<ELFT>> StrTab; - std::unique_ptr<SymbolTableSection<ELFT>> SymTabSec; - std::unique_ptr<OutputSection<ELFT>> MipsRldMap; - std::unique_ptr<VersionDefinitionSection<ELFT>> VerDef; - - if (Config->BuildId == BuildIdKind::Fnv1) - BuildId.reset(new BuildIdFnv1<ELFT>); - else if (Config->BuildId == BuildIdKind::Md5) - BuildId.reset(new BuildIdMd5<ELFT>); - else if (Config->BuildId == BuildIdKind::Sha1) - BuildId.reset(new BuildIdSha1<ELFT>); - else if (Config->BuildId == BuildIdKind::Hexstring) - BuildId.reset(new BuildIdHexstring<ELFT>); - - if (Config->EhFrameHdr) - EhFrameHdr.reset(new EhFrameHeader<ELFT>); - - if (Config->GnuHash) - GnuHashTab.reset(new GnuHashTableSection<ELFT>); - if (Config->SysvHash) - HashTab.reset(new HashTableSection<ELFT>); - StringRef S = Config->Rela ? ".rela.plt" : ".rel.plt"; - GotPlt.reset(new GotPltSection<ELFT>); - RelaPlt.reset(new RelocationSection<ELFT>(S, false /*Sort*/)); - if (!Config->StripAll) { - StrTab.reset(new StringTableSection<ELFT>(".strtab", false)); - SymTabSec.reset(new SymbolTableSection<ELFT>(*StrTab)); - } - if (Config->EMachine == EM_MIPS && !Config->Shared) { - // This is a MIPS specific section to hold a space within the data segment - // of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. - // See "Dynamic section" in Chapter 5 in the following document: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - MipsRldMap.reset(new OutputSection<ELFT>(".rld_map", SHT_PROGBITS, - SHF_ALLOC | SHF_WRITE)); - MipsRldMap->setSize(sizeof(uintX_t)); - MipsRldMap->updateAlignment(sizeof(uintX_t)); - } - if (!Config->VersionDefinitions.empty()) - VerDef.reset(new VersionDefinitionSection<ELFT>()); - - Out<ELFT>::Bss = &Bss; - Out<ELFT>::BuildId = BuildId.get(); - Out<ELFT>::DynStrTab = &DynStrTab; - Out<ELFT>::DynSymTab = &DynSymTab; - Out<ELFT>::Dynamic = &Dynamic; - Out<ELFT>::EhFrame = &EhFrame; - Out<ELFT>::EhFrameHdr = EhFrameHdr.get(); - Out<ELFT>::GnuHashTab = GnuHashTab.get(); - Out<ELFT>::Got = &Got; - Out<ELFT>::GotPlt = GotPlt.get(); - Out<ELFT>::HashTab = HashTab.get(); - Out<ELFT>::Interp = &Interp; - Out<ELFT>::Plt = &Plt; - Out<ELFT>::RelaDyn = &RelaDyn; - Out<ELFT>::RelaPlt = RelaPlt.get(); - Out<ELFT>::ShStrTab = &ShStrTab; - Out<ELFT>::StrTab = StrTab.get(); - Out<ELFT>::SymTab = SymTabSec.get(); - Out<ELFT>::VerDef = VerDef.get(); - Out<ELFT>::VerSym = &VerSym; - Out<ELFT>::VerNeed = &VerNeed; - Out<ELFT>::MipsRldMap = MipsRldMap.get(); - Out<ELFT>::Opd = nullptr; - Out<ELFT>::OpdBuf = nullptr; - Out<ELFT>::TlsPhdr = nullptr; - Out<ELFT>::ElfHeader = &ElfHeader; - Out<ELFT>::ProgramHeaders = &ProgramHeaders; - - Writer<ELFT>(*Symtab).run(); +template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); } + +template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() { + auto I = std::remove_if(Phdrs.begin(), Phdrs.end(), [&](const PhdrEntry &P) { + if (P.p_type != PT_LOAD) + return false; + if (!P.First) + return true; + uintX_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; + return Size == 0; + }); + Phdrs.erase(I, Phdrs.end()); } // The main function of the writer. template <class ELFT> void Writer<ELFT>::run() { - if (!Config->DiscardAll) + // Create linker-synthesized sections such as .got or .plt. + // Such sections are of type input section. + createSyntheticSections(); + + // We need to create some reserved symbols such as _end. Create them. + if (!Config->Relocatable) + addReservedSymbols(); + + // Some architectures use small displacements for jump instructions. + // It is linker's responsibility to create thunks containing long + // jump instructions if jump targets are too far. Create thunks. + if (Target->NeedsThunks) + forEachRelSec(createThunks<ELFT>); + + // Create output sections. + Script<ELFT>::X->OutputSections = &OutputSections; + if (ScriptConfig->HasSections) { + // If linker script contains SECTIONS commands, let it create sections. + Script<ELFT>::X->processCommands(Factory); + + // Linker scripts may have left some input sections unassigned. + // Assign such sections using the default rule. + Script<ELFT>::X->addOrphanSections(Factory); + } else { + // If linker script does not contain SECTIONS commands, create + // output sections by default rules. We still need to give the + // linker script a chance to run, because it might contain + // non-SECTIONS commands such as ASSERT. + createSections(); + Script<ELFT>::X->processCommands(Factory); + } + + if (Config->Discard != DiscardPolicy::All) copyLocalSymbols(); - addReservedSymbols(); - createSections(); - if (HasError) + + // Now that we have a complete set of output sections. This function + // completes section contents. For example, we need to add strings + // to the string table, and add entries to .got and .plt. + // finalizeSections does that. + finalizeSections(); + if (ErrorCount) return; if (Config->Relocatable) { assignFileOffsets(); } else { - createPhdrs(); - fixHeaders(); - if (ScriptConfig->DoLayout) { - Script<ELFT>::X->assignAddresses(OutputSections); + if (ScriptConfig->HasSections) { + Script<ELFT>::X->assignAddresses(Phdrs); } else { fixSectionAlignments(); assignAddresses(); } - assignFileOffsets(); + + // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a + // 0 sized region. This has to be done late since only after assignAddresses + // we know the size of the sections. + removeEmptyPTLoad(); + + if (!Config->OFormatBinary) + assignFileOffsets(); + else + assignFileOffsetsBinary(); + setPhdrs(); fixAbsoluteSymbols(); } + // Write the result down to a file. openFile(); - if (HasError) + if (ErrorCount) return; - writeHeader(); - writeSections(); + if (!Config->OFormatBinary) { + writeHeader(); + writeSections(); + } else { + writeSectionsBinary(); + } + + // Backfill .note.gnu.build-id section content. This is done at last + // because the content is usually a hash value of the entire output file. writeBuildId(); - if (HasError) + if (ErrorCount) return; + if (auto EC = Buffer->commit()) error(EC, "failed to write to the output file"); + + // Flush the output streams and exit immediately. A full shutdown + // is a good test that we are keeping track of all allocated memory, + // but actually freeing it is a waste of time in a regular linker run. + if (Config->ExitEarly) + exitLld(0); } -template <class ELFT> -static void reportUndefined(SymbolTable<ELFT> &Symtab, SymbolBody *Sym) { - if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore) - return; +// Initialize Out<ELFT> members. +template <class ELFT> void Writer<ELFT>::createSyntheticSections() { + // Initialize all pointers with NULL. This is needed because + // you can call lld::elf::main more than once as a library. + memset(&Out<ELFT>::First, 0, sizeof(Out<ELFT>)); - if (Config->Shared && Sym->symbol()->Visibility == STV_DEFAULT && - Config->UnresolvedSymbols != UnresolvedPolicy::NoUndef) - return; + // Create singleton output sections. + Out<ELFT>::Bss = + make<OutputSection<ELFT>>(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); + In<ELFT>::DynStrTab = make<StringTableSection<ELFT>>(".dynstr", true); + In<ELFT>::Dynamic = make<DynamicSection<ELFT>>(); + Out<ELFT>::EhFrame = make<EhOutputSection<ELFT>>(); + In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + In<ELFT>::ShStrTab = make<StringTableSection<ELFT>>(".shstrtab", false); + + Out<ELFT>::ElfHeader = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ElfHeader->Size = sizeof(Elf_Ehdr); + Out<ELFT>::ProgramHeaders = make<OutputSectionBase>("", 0, SHF_ALLOC); + Out<ELFT>::ProgramHeaders->updateAlignment(sizeof(uintX_t)); + + if (needsInterpSection<ELFT>()) { + In<ELFT>::Interp = createInterpSection<ELFT>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Interp); + } else { + In<ELFT>::Interp = nullptr; + } - std::string Msg = "undefined symbol: " + Sym->getName().str(); - if (Sym->File) - Msg += " in " + getFilename(Sym->File); - if (Config->UnresolvedSymbols == UnresolvedPolicy::Warn) - warning(Msg); - else - error(Msg); + if (!Config->Relocatable) + Symtab<ELFT>::X->Sections.push_back(createCommentSection<ELFT>()); + + if (Config->Strip != StripPolicy::All) { + In<ELFT>::StrTab = make<StringTableSection<ELFT>>(".strtab", false); + In<ELFT>::SymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::StrTab); + } + + if (Config->BuildId != BuildIdKind::None) { + In<ELFT>::BuildId = make<BuildIdSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::BuildId); + } + + InputSection<ELFT> *Common = createCommonSection<ELFT>(); + if (!Common->Data.empty()) { + In<ELFT>::Common = Common; + Symtab<ELFT>::X->Sections.push_back(Common); + } + + // Add MIPS-specific sections. + bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || Config->Pic; + if (Config->EMachine == EM_MIPS) { + if (!Config->Shared && HasDynSymTab) { + In<ELFT>::MipsRldMap = make<MipsRldMapSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsRldMap); + } + if (auto *Sec = MipsAbiFlagsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsOptionsSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + if (auto *Sec = MipsReginfoSection<ELFT>::create()) + Symtab<ELFT>::X->Sections.push_back(Sec); + } + + if (HasDynSymTab) { + In<ELFT>::DynSymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynSymTab); + + In<ELFT>::VerSym = make<VersionTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerSym); + + if (!Config->VersionDefinitions.empty()) { + In<ELFT>::VerDef = make<VersionDefinitionSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerDef); + } + + In<ELFT>::VerNeed = make<VersionNeedSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerNeed); + + if (Config->GnuHash) { + In<ELFT>::GnuHashTab = make<GnuHashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GnuHashTab); + } + + if (Config->SysvHash) { + In<ELFT>::HashTab = make<HashTableSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::HashTab); + } + + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Dynamic); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynStrTab); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaDyn); + } + + // Add .got. MIPS' .got is so different from the other archs, + // it has its own class. + if (Config->EMachine == EM_MIPS) { + In<ELFT>::MipsGot = make<MipsGotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsGot); + } else { + In<ELFT>::Got = make<GotSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Got); + } + + In<ELFT>::GotPlt = make<GotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GotPlt); + In<ELFT>::IgotPlt = make<IgotPltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::IgotPlt); + + if (Config->GdbIndex) { + In<ELFT>::GdbIndex = make<GdbIndexSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GdbIndex); + } + + // We always need to add rel[a].plt to output if it has entries. + // Even for static linking it can contain R_[*]_IRELATIVE relocations. + In<ELFT>::RelaPlt = make<RelocationSection<ELFT>>( + Config->Rela ? ".rela.plt" : ".rel.plt", false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaPlt); + + // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure + // that the IRelative relocations are processed last by the dynamic loader + In<ELFT>::RelaIplt = make<RelocationSection<ELFT>>( + (Config->EMachine == EM_ARM) ? ".rel.dyn" : In<ELFT>::RelaPlt->Name, + false /*Sort*/); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaIplt); + + In<ELFT>::Plt = make<PltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Plt); + In<ELFT>::Iplt = make<IpltSection<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Iplt); + + if (Config->EhFrameHdr) { + In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); + Symtab<ELFT>::X->Sections.push_back(In<ELFT>::EhFrameHdr); + } } template <class ELFT> @@ -297,7 +391,7 @@ static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, if (Sec == &InputSection<ELFT>::Discarded) return false; - if (Config->DiscardNone) + if (Config->Discard == DiscardPolicy::None) return true; // In ELF assembly .L symbols are normally discarded by the assembler. @@ -308,16 +402,22 @@ static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, if (!SymName.startswith(".L") && !SymName.empty()) return true; - if (Config->DiscardLocals) + if (Config->Discard == DiscardPolicy::Locals) return false; - return !(Sec->getSectionHdr()->sh_flags & SHF_MERGE); + return !Sec || !(Sec->Flags & SHF_MERGE); } template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) return false; + // If --retain-symbols-file is given, we'll keep only symbols listed in that + // file. + if (Config->Discard == DiscardPolicy::RetainFile && + !Config->RetainSymbolsFile.count(B.getName())) + return false; + if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B)) { // Always include absolute symbols. if (!D->Section) @@ -335,27 +435,29 @@ template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { // Local symbols are not in the linker's symbol table. This function scans // each object file's symbol table to copy local symbols to the output. template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { - if (!Out<ELFT>::SymTab) + if (!In<ELFT>::SymTab) return; - for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : - Symtab.getObjectFiles()) { - const char *StrTab = F->getStringTable().data(); + for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { for (SymbolBody *B : F->getLocalSymbols()) { + if (!B->IsLocal) + fatal(toString(F) + + ": broken object: getLocalSymbols returns a non-local symbol"); auto *DR = dyn_cast<DefinedRegular<ELFT>>(B); + // No reason to keep local undefined symbol in symtab. if (!DR) continue; if (!includeInSymtab<ELFT>(*B)) continue; - StringRef SymName(StrTab + B->getNameOffset()); + InputSectionBase<ELFT> *Sec = DR->Section; - if (!shouldKeepInSymtab<ELFT>(Sec, SymName, *B)) + if (!shouldKeepInSymtab<ELFT>(Sec, B->getName(), *B)) continue; - ++Out<ELFT>::SymTab->NumLocals; + ++In<ELFT>::SymTab->NumLocals; if (Config->Relocatable) - B->DynsymIndex = Out<ELFT>::SymTab->NumLocals; - F->KeptLocalSyms.push_back( - std::make_pair(DR, Out<ELFT>::SymTab->StrTabSec.addString(SymName))); + B->DynsymIndex = In<ELFT>::SymTab->NumLocals; + F->KeptLocalSyms.push_back(std::make_pair( + DR, In<ELFT>::SymTab->StrTabSec.addString(B->getName()))); } } } @@ -376,65 +478,82 @@ static int getPPC64SectionRank(StringRef SectionName) { .Default(1); } -template <class ELFT> static bool isRelroSection(OutputSectionBase<ELFT> *Sec) { +template <class ELFT> bool elf::isRelroSection(const OutputSectionBase *Sec) { if (!Config->ZRelro) return false; - typename ELFT::uint Flags = Sec->getFlags(); + uint64_t Flags = Sec->Flags; if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) return false; if (Flags & SHF_TLS) return true; - uint32_t Type = Sec->getType(); + uint32_t Type = Sec->Type; if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || Type == SHT_PREINIT_ARRAY) return true; - if (Sec == Out<ELFT>::GotPlt) + if (Sec == In<ELFT>::GotPlt->OutSec) return Config->ZNow; - if (Sec == Out<ELFT>::Dynamic || Sec == Out<ELFT>::Got) + if (Sec == In<ELFT>::Dynamic->OutSec) + return true; + if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec) + return true; + if (In<ELFT>::MipsGot && Sec == In<ELFT>::MipsGot->OutSec) return true; StringRef S = Sec->getName(); return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || - S == ".eh_frame"; + S == ".eh_frame" || S == ".openbsd.randomdata"; } -// Output section ordering is determined by this function. template <class ELFT> -static bool compareSections(OutputSectionBase<ELFT> *A, - OutputSectionBase<ELFT> *B) { - typedef typename ELFT::uint uintX_t; - - int Comp = Script<ELFT>::X->compareSections(A->getName(), B->getName()); - if (Comp != 0) - return Comp < 0; - - uintX_t AFlags = A->getFlags(); - uintX_t BFlags = B->getFlags(); +static bool compareSectionsNonScript(const OutputSectionBase *A, + const OutputSectionBase *B) { + // Put .interp first because some loaders want to see that section + // on the first page of the executable file when loaded into memory. + bool AIsInterp = A->getName() == ".interp"; + bool BIsInterp = B->getName() == ".interp"; + if (AIsInterp != BIsInterp) + return AIsInterp; // Allocatable sections go first to reduce the total PT_LOAD size and // so debug info doesn't change addresses in actual code. - bool AIsAlloc = AFlags & SHF_ALLOC; - bool BIsAlloc = BFlags & SHF_ALLOC; + bool AIsAlloc = A->Flags & SHF_ALLOC; + bool BIsAlloc = B->Flags & SHF_ALLOC; if (AIsAlloc != BIsAlloc) return AIsAlloc; - // We don't have any special requirements for the relative order of - // two non allocatable sections. + // We don't have any special requirements for the relative order of two non + // allocatable sections. if (!AIsAlloc) return false; + // We want to put section specified by -T option first, so we + // can start assigning VA starting from them later. + auto AAddrSetI = Config->SectionStartMap.find(A->getName()); + auto BAddrSetI = Config->SectionStartMap.find(B->getName()); + bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end(); + bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end(); + if (AHasAddrSet != BHasAddrSet) + return AHasAddrSet; + if (AHasAddrSet) + return AAddrSetI->second < BAddrSetI->second; + // We want the read only sections first so that they go in the PT_LOAD // covering the program headers at the start of the file. - bool AIsWritable = AFlags & SHF_WRITE; - bool BIsWritable = BFlags & SHF_WRITE; + bool AIsWritable = A->Flags & SHF_WRITE; + bool BIsWritable = B->Flags & SHF_WRITE; if (AIsWritable != BIsWritable) return BIsWritable; - // For a corresponding reason, put non exec sections first (the program - // header PT_LOAD is not executable). - bool AIsExec = AFlags & SHF_EXECINSTR; - bool BIsExec = BFlags & SHF_EXECINSTR; - if (AIsExec != BIsExec) - return BIsExec; + if (!Config->SingleRoRx) { + // For a corresponding reason, put non exec sections first (the program + // header PT_LOAD is not executable). + // We only do that if we are not using linker scripts, since with linker + // scripts ro and rx sections are in the same PT_LOAD, so their relative + // order is not important. The same applies for -no-rosegment. + bool AIsExec = A->Flags & SHF_EXECINSTR; + bool BIsExec = B->Flags & SHF_EXECINSTR; + if (AIsExec != BIsExec) + return BIsExec; + } // If we got here we know that both A and B are in the same PT_LOAD. @@ -442,8 +561,8 @@ static bool compareSections(OutputSectionBase<ELFT> *A, // PT_LOAD, so stick TLS sections directly before R/W sections. The TLS NOBITS // sections are placed here as they don't take up virtual address space in the // PT_LOAD. - bool AIsTls = AFlags & SHF_TLS; - bool BIsTls = BFlags & SHF_TLS; + bool AIsTls = A->Flags & SHF_TLS; + bool BIsTls = B->Flags & SHF_TLS; if (AIsTls != BIsTls) return AIsTls; @@ -452,14 +571,14 @@ static bool compareSections(OutputSectionBase<ELFT> *A, // them is a p_memsz that is larger than p_filesz. Seeing that it // zeros the end of the PT_LOAD, so that has to correspond to the // nobits sections. - bool AIsNoBits = A->getType() == SHT_NOBITS; - bool BIsNoBits = B->getType() == SHT_NOBITS; + bool AIsNoBits = A->Type == SHT_NOBITS; + bool BIsNoBits = B->Type == SHT_NOBITS; if (AIsNoBits != BIsNoBits) return BIsNoBits; // We place RelRo section before plain r/w ones. - bool AIsRelRo = isRelroSection(A); - bool BIsRelRo = isRelroSection(B); + bool AIsRelRo = isRelroSection<ELFT>(A); + bool BIsRelRo = isRelroSection<ELFT>(B); if (AIsRelRo != BIsRelRo) return AIsRelRo; @@ -472,40 +591,70 @@ static bool compareSections(OutputSectionBase<ELFT> *A, return false; } -// Until this function is called, common symbols do not belong to any section. -// This function adds them to end of BSS section. +// Output section ordering is determined by this function. template <class ELFT> -void Writer<ELFT>::addCommonSymbols(std::vector<DefinedCommon *> &Syms) { - if (Syms.empty()) - return; +static bool compareSections(const OutputSectionBase *A, + const OutputSectionBase *B) { + // For now, put sections mentioned in a linker script first. + int AIndex = Script<ELFT>::X->getSectionIndex(A->getName()); + int BIndex = Script<ELFT>::X->getSectionIndex(B->getName()); + bool AInScript = AIndex != INT_MAX; + bool BInScript = BIndex != INT_MAX; + if (AInScript != BInScript) + return AInScript; + // If both are in the script, use that order. + if (AInScript) + return AIndex < BIndex; + + return compareSectionsNonScript<ELFT>(A, B); +} - // Sort the common symbols by alignment as an heuristic to pack them better. - std::stable_sort(Syms.begin(), Syms.end(), - [](const DefinedCommon *A, const DefinedCommon *B) { - return A->Alignment > B->Alignment; - }); - - uintX_t Off = Out<ELFT>::Bss->getSize(); - for (DefinedCommon *C : Syms) { - Off = alignTo(Off, C->Alignment); - Out<ELFT>::Bss->updateAlignment(C->Alignment); - C->OffsetInBss = Off; - Off += C->Size; - } +// Program header entry +PhdrEntry::PhdrEntry(unsigned Type, unsigned Flags) { + p_type = Type; + p_flags = Flags; +} + +void PhdrEntry::add(OutputSectionBase *Sec) { + Last = Sec; + if (!First) + First = Sec; + p_align = std::max(p_align, Sec->Addralign); + if (p_type == PT_LOAD) + Sec->FirstInPtLoad = First; +} - Out<ELFT>::Bss->setSize(Off); +template <class ELFT> +static Symbol *addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, + typename ELFT::uint Val, + uint8_t StOther = STV_HIDDEN) { + SymbolBody *S = Symtab<ELFT>::X->find(Name); + if (!S) + return nullptr; + if (!S->isUndefined() && !S->isShared()) + return S->symbol(); + return Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther); +} + +template <class ELFT> +static Symbol *addRegular(StringRef Name, InputSectionBase<ELFT> *Sec, + typename ELFT::uint Value) { + // The linker generated symbols are added as STB_WEAK to allow user defined + // ones to override them. + return Symtab<ELFT>::X->addRegular(Name, STV_HIDDEN, STT_NOTYPE, Value, + /*Size=*/0, STB_WEAK, Sec, + /*File=*/nullptr); } template <class ELFT> -static Symbol *addOptionalSynthetic(SymbolTable<ELFT> &Table, StringRef Name, - OutputSectionBase<ELFT> *Sec, - typename ELFT::uint Val) { - SymbolBody *S = Table.find(Name); +static Symbol *addOptionalRegular(StringRef Name, InputSectionBase<ELFT> *IS, + typename ELFT::uint Value) { + SymbolBody *S = Symtab<ELFT>::X->find(Name); if (!S) return nullptr; if (!S->isUndefined() && !S->isShared()) return S->symbol(); - return Table.addSynthetic(Name, Sec, Val); + return addRegular(Name, IS, Value); } // The beginning and the ending of .rel[a].plt section are marked @@ -515,14 +664,13 @@ static Symbol *addOptionalSynthetic(SymbolTable<ELFT> &Table, StringRef Name, // need these symbols, since IRELATIVE relocs are resolved through GOT // and PLT. For details, see http://www.airs.com/blog/archives/403. template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { - if (isOutputDynamic() || !Out<ELFT>::RelaPlt) + if (In<ELFT>::DynSymTab) return; StringRef S = Config->Rela ? "__rela_iplt_start" : "__rel_iplt_start"; - addOptionalSynthetic(Symtab, S, Out<ELFT>::RelaPlt, 0); + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0); S = Config->Rela ? "__rela_iplt_end" : "__rel_iplt_end"; - addOptionalSynthetic(Symtab, S, Out<ELFT>::RelaPlt, - DefinedSynthetic<ELFT>::SectionEnd); + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1); } // The linker is expected to define some symbols depending on @@ -530,24 +678,28 @@ template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { template <class ELFT> void Writer<ELFT>::addReservedSymbols() { if (Config->EMachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer - // so that it points to an absolute address which is relative to GOT. + // so that it points to an absolute address which by default is relative + // to GOT. Default offset is 0x7ff0. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - Symtab.addSynthetic("_gp", Out<ELFT>::Got, MipsGPOffset); + ElfSym<ELFT>::MipsGp = + Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between - // start of function and 'gp' pointer into GOT. - Symbol *Sym = - addOptionalSynthetic(Symtab, "_gp_disp", Out<ELFT>::Got, MipsGPOffset); - if (Sym) - ElfSym<ELFT>::MipsGpDisp = Sym->body(); + // start of function and 'gp' pointer into GOT. To simplify relocation + // calculation we assign _gp value to it and calculate corresponding + // relocations as relative to this value. + if (Symtab<ELFT>::X->find("_gp_disp")) + ElfSym<ELFT>::MipsGpDisp = + Symtab<ELFT>::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html - addOptionalSynthetic(Symtab, "__gnu_local_gp", Out<ELFT>::Got, - MipsGPOffset); + if (Symtab<ELFT>::X->find("__gnu_local_gp")) + ElfSym<ELFT>::MipsLocalGp = + Symtab<ELFT>::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); } // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol @@ -562,27 +714,35 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() { // an undefined symbol in the .o files. // Given that the symbol is effectively unused, we just create a dummy // hidden one to avoid the undefined symbol error. - if (!Config->Relocatable) - Symtab.addIgnored("_GLOBAL_OFFSET_TABLE_"); + Symtab<ELFT>::X->addIgnored("_GLOBAL_OFFSET_TABLE_"); // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For // static linking the linker is required to optimize away any references to // __tls_get_addr, so it's not defined anywhere. Create a hidden definition - // to avoid the undefined symbol error. - if (!isOutputDynamic()) - Symtab.addIgnored("__tls_get_addr"); + // to avoid the undefined symbol error. As usual special cases are ARM and + // MIPS - the libc for these targets defines __tls_get_addr itself because + // there are no TLS optimizations for these targets. + if (!In<ELFT>::DynSymTab && + (Config->EMachine != EM_MIPS && Config->EMachine != EM_ARM)) + Symtab<ELFT>::X->addIgnored("__tls_get_addr"); + + // If linker script do layout we do not need to create any standart symbols. + if (ScriptConfig->HasSections) + return; + + ElfSym<ELFT>::EhdrStart = Symtab<ELFT>::X->addIgnored("__ehdr_start"); auto Define = [this](StringRef S, DefinedRegular<ELFT> *&Sym1, DefinedRegular<ELFT> *&Sym2) { - Sym1 = Symtab.addIgnored(S, STV_DEFAULT); + Sym1 = Symtab<ELFT>::X->addIgnored(S, STV_DEFAULT); // The name without the underscore is not a reserved name, // so it is defined only when there is a reference against it. assert(S.startswith("_")); S = S.substr(1); - if (SymbolBody *B = Symtab.find(S)) + if (SymbolBody *B = Symtab<ELFT>::X->find(S)) if (B->isUndefined()) - Sym2 = Symtab.addAbsolute(S, STV_DEFAULT); + Sym2 = Symtab<ELFT>::X->addAbsolute(S, STV_DEFAULT); }; Define("_end", ElfSym<ELFT>::End, ElfSym<ELFT>::End2); @@ -592,65 +752,239 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() { // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). -template <class ELFT> static void sortInitFini(OutputSectionBase<ELFT> *S) { +template <class ELFT> static void sortInitFini(OutputSectionBase *S) { if (S) reinterpret_cast<OutputSection<ELFT> *>(S)->sortInitFini(); } // Sort input sections by the special rule for .ctors and .dtors. -template <class ELFT> static void sortCtorsDtors(OutputSectionBase<ELFT> *S) { +template <class ELFT> static void sortCtorsDtors(OutputSectionBase *S) { if (S) reinterpret_cast<OutputSection<ELFT> *>(S)->sortCtorsDtors(); } -// Create output section objects and add them to OutputSections. -template <class ELFT> void Writer<ELFT>::createSections() { - // Create output sections for input object file sections. - std::vector<OutputSectionBase<ELFT> *> RegularSections; - OutputSectionFactory<ELFT> Factory; - for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : - Symtab.getObjectFiles()) { - for (InputSectionBase<ELFT> *C : F->getSections()) { - if (isDiscarded(C)) { - reportDiscarded(C, F); +// Sort input sections using the list provided by --symbol-ordering-file. +template <class ELFT> +static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { + if (Config->SymbolOrderingFile.empty()) + return; + + // Build a map from symbols to their priorities. Symbols that didn't + // appear in the symbol ordering file have the lowest priority 0. + // All explicitly mentioned symbols have negative (higher) priorities. + DenseMap<StringRef, int> SymbolOrder; + int Priority = -Config->SymbolOrderingFile.size(); + for (StringRef S : Config->SymbolOrderingFile) + SymbolOrder.insert({S, Priority++}); + + // Build a map from sections to their priorities. + DenseMap<InputSectionBase<ELFT> *, int> SectionOrder; + for (elf::ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { + for (SymbolBody *Body : File->getSymbols()) { + auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + if (!D || !D->Section) continue; - } - OutputSectionBase<ELFT> *Sec; - bool IsNew; - std::tie(Sec, IsNew) = Factory.create(C, getOutputSectionName(C)); - if (IsNew) { - OwningSections.emplace_back(Sec); - OutputSections.push_back(Sec); - RegularSections.push_back(Sec); - } - Sec->addSection(C); + int &Priority = SectionOrder[D->Section]; + Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); } } - // If we have a .opd section (used under PPC64 for function descriptors), - // store a pointer to it here so that we can use it later when processing - // relocations. - Out<ELFT>::Opd = Factory.lookup(".opd", SHT_PROGBITS, SHF_WRITE | SHF_ALLOC); + // Sort sections by priority. + for (OutputSectionBase *Base : OutputSections) + if (auto *Sec = dyn_cast<OutputSection<ELFT>>(Base)) + Sec->sort([&](InputSection<ELFT> *S) { return SectionOrder.lookup(S); }); +} + +template <class ELFT> +void Writer<ELFT>::forEachRelSec( + std::function<void(InputSectionBase<ELFT> &)> Fn) { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) { + if (!IS->Live) + continue; + // Scan all relocations. Each relocation goes through a series + // of tests to determine if it needs special treatment, such as + // creating GOT, PLT, copy relocations, etc. + // Note that relocations for non-alloc sections are directly + // processed by InputSection::relocateNonAlloc. + if (!(IS->Flags & SHF_ALLOC)) + continue; + if (isa<InputSection<ELFT>>(IS) || isa<EhInputSection<ELFT>>(IS)) + Fn(*IS); + } +} + +template <class ELFT> +void Writer<ELFT>::addInputSec(InputSectionBase<ELFT> *IS) { + if (!IS) + return; + + if (!IS->Live) { + reportDiscarded(IS); + return; + } + OutputSectionBase *Sec; + bool IsNew; + StringRef OutsecName = getOutputSectionName(IS->Name); + std::tie(Sec, IsNew) = Factory.create(IS, OutsecName); + if (IsNew) + OutputSections.push_back(Sec); + Sec->addSection(IS); +} + +template <class ELFT> void Writer<ELFT>::createSections() { + for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) + addInputSec(IS); + + sortBySymbolsOrder<ELFT>(OutputSections); + sortInitFini<ELFT>(findSection(".init_array")); + sortInitFini<ELFT>(findSection(".fini_array")); + sortCtorsDtors<ELFT>(findSection(".ctors")); + sortCtorsDtors<ELFT>(findSection(".dtors")); + + for (OutputSectionBase *Sec : OutputSections) + Sec->assignOffsets(); +} + +template <class ELFT> +static bool canSharePtLoad(const OutputSectionBase &S1, + const OutputSectionBase &S2) { + if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC)) + return false; + + bool S1IsWrite = S1.Flags & SHF_WRITE; + bool S2IsWrite = S2.Flags & SHF_WRITE; + if (S1IsWrite != S2IsWrite) + return false; + + if (!S1IsWrite) + return true; // RO and RX share a PT_LOAD with linker scripts. + return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR); +} + +template <class ELFT> void Writer<ELFT>::sortSections() { + // Don't sort if using -r. It is not necessary and we want to preserve the + // relative order for SHF_LINK_ORDER sections. + if (Config->Relocatable) + return; + if (!ScriptConfig->HasSections) { + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSectionsNonScript<ELFT>); + return; + } + Script<ELFT>::X->adjustSectionsBeforeSorting(); + + // The order of the sections in the script is arbitrary and may not agree with + // compareSectionsNonScript. This means that we cannot easily define a + // strict weak ordering. To see why, consider a comparison of a section in the + // script and one not in the script. We have a two simple options: + // * Make them equivalent (a is not less than b, and b is not less than a). + // The problem is then that equivalence has to be transitive and we can + // have sections a, b and c with only b in a script and a less than c + // which breaks this property. + // * Use compareSectionsNonScript. Given that the script order doesn't have + // to match, we can end up with sections a, b, c, d where b and c are in the + // script and c is compareSectionsNonScript less than b. In which case d + // can be equivalent to c, a to b and d < a. As a concrete example: + // .a (rx) # not in script + // .b (rx) # in script + // .c (ro) # in script + // .d (ro) # not in script + // + // The way we define an order then is: + // * First put script sections at the start and sort the script and + // non-script sections independently. + // * Move each non-script section to its preferred position. We try + // to put each section in the last position where it it can share + // a PT_LOAD. + + std::stable_sort(OutputSections.begin(), OutputSections.end(), + compareSections<ELFT>); + + auto I = OutputSections.begin(); + auto E = OutputSections.end(); + auto NonScriptI = + std::find_if(OutputSections.begin(), E, [](OutputSectionBase *S) { + return Script<ELFT>::X->getSectionIndex(S->getName()) == INT_MAX; + }); + while (NonScriptI != E) { + auto BestPos = std::max_element( + I, NonScriptI, [&](OutputSectionBase *&A, OutputSectionBase *&B) { + bool ACanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *A); + bool BCanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *B); + if (ACanSharePtLoad != BCanSharePtLoad) + return BCanSharePtLoad; + + bool ACmp = compareSectionsNonScript<ELFT>(*NonScriptI, A); + bool BCmp = compareSectionsNonScript<ELFT>(*NonScriptI, B); + if (ACmp != BCmp) + return BCmp; // FIXME: missing test + + size_t PosA = &A - &OutputSections[0]; + size_t PosB = &B - &OutputSections[0]; + return ACmp ? PosA > PosB : PosA < PosB; + }); + + // max_element only returns NonScriptI if the range is empty. If the range + // is not empty we should consider moving the the element forward one + // position. + if (BestPos != NonScriptI && + !compareSectionsNonScript<ELFT>(*NonScriptI, *BestPos)) + ++BestPos; + std::rotate(BestPos, NonScriptI, NonScriptI + 1); + ++NonScriptI; + } + + Script<ELFT>::X->adjustSectionsAfterSorting(); +} + +template <class ELFT> +static void +finalizeSynthetic(const std::vector<SyntheticSection<ELFT> *> &Sections) { + for (SyntheticSection<ELFT> *SS : Sections) + if (SS && SS->OutSec && !SS->empty()) { + SS->finalize(); + SS->OutSec->Size = 0; + SS->OutSec->assignOffsets(); + } +} + +// We need to add input synthetic sections early in createSyntheticSections() +// to make them visible from linkescript side. But not all sections are always +// required to be in output. For example we don't need dynamic section content +// sometimes. This function filters out such unused sections from output. +template <class ELFT> +static void removeUnusedSyntheticSections(std::vector<OutputSectionBase *> &V) { + // Input synthetic sections are placed after all regular ones. We iterate over + // them all and exit at first non-synthetic. + for (InputSectionBase<ELFT> *S : llvm::reverse(Symtab<ELFT>::X->Sections)) { + SyntheticSection<ELFT> *SS = dyn_cast<SyntheticSection<ELFT>>(S); + if (!SS) + return; + if (!SS->empty() || !SS->OutSec) + continue; - Out<ELFT>::Dynamic->PreInitArraySec = Factory.lookup( - ".preinit_array", SHT_PREINIT_ARRAY, SHF_WRITE | SHF_ALLOC); - Out<ELFT>::Dynamic->InitArraySec = - Factory.lookup(".init_array", SHT_INIT_ARRAY, SHF_WRITE | SHF_ALLOC); - Out<ELFT>::Dynamic->FiniArraySec = - Factory.lookup(".fini_array", SHT_FINI_ARRAY, SHF_WRITE | SHF_ALLOC); + OutputSection<ELFT> *OutSec = cast<OutputSection<ELFT>>(SS->OutSec); + OutSec->Sections.erase( + std::find(OutSec->Sections.begin(), OutSec->Sections.end(), SS)); + // If there is no other sections in output section, remove it from output. + if (OutSec->Sections.empty()) + V.erase(std::find(V.begin(), V.end(), OutSec)); + } +} - // Sort section contents for __attribute__((init_priority(N)). - sortInitFini(Out<ELFT>::Dynamic->InitArraySec); - sortInitFini(Out<ELFT>::Dynamic->FiniArraySec); - sortCtorsDtors(Factory.lookup(".ctors", SHT_PROGBITS, SHF_WRITE | SHF_ALLOC)); - sortCtorsDtors(Factory.lookup(".dtors", SHT_PROGBITS, SHF_WRITE | SHF_ALLOC)); +// Create output section objects and add them to OutputSections. +template <class ELFT> void Writer<ELFT>::finalizeSections() { + Out<ELFT>::DebugInfo = findSection(".debug_info"); + Out<ELFT>::PreinitArray = findSection(".preinit_array"); + Out<ELFT>::InitArray = findSection(".init_array"); + Out<ELFT>::FiniArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!Config->Relocatable) { addStartEndSymbols(); - for (OutputSectionBase<ELFT> *Sec : RegularSections) + for (OutputSectionBase *Sec : OutputSections) addStartStopSymbols(Sec); } @@ -658,16 +992,12 @@ template <class ELFT> void Writer<ELFT>::createSections() { // It should be okay as no one seems to care about the type. // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html - if (isOutputDynamic()) - Symtab.addSynthetic("_DYNAMIC", Out<ELFT>::Dynamic, 0); + if (In<ELFT>::DynSymTab) + addRegular("_DYNAMIC", In<ELFT>::Dynamic, 0); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); - // Add scripted symbols with zero values now. - // Real values will be assigned later - Script<ELFT>::X->addScriptedSymbols(); - if (!Out<ELFT>::EhFrame->empty()) { OutputSections.push_back(Out<ELFT>::EhFrame); Out<ELFT>::EhFrame->finalize(); @@ -675,190 +1005,102 @@ template <class ELFT> void Writer<ELFT>::createSections() { // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. - for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F : - Symtab.getObjectFiles()) { - for (InputSectionBase<ELFT> *C : F->getSections()) { - if (isDiscarded(C)) - continue; - if (auto *S = dyn_cast<InputSection<ELFT>>(C)) { - scanRelocations(*S); - continue; - } - if (auto *S = dyn_cast<EhInputSection<ELFT>>(C)) - if (S->RelocSection) - scanRelocations(*S, *S->RelocSection); - } - } - - for (OutputSectionBase<ELFT> *Sec : OutputSections) - Sec->assignOffsets(); + forEachRelSec(scanRelocations<ELFT>); // Now that we have defined all possible symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. - std::vector<DefinedCommon *> CommonSymbols; - for (Symbol *S : Symtab.getSymbols()) { + for (Symbol *S : Symtab<ELFT>::X->getSymbols()) { SymbolBody *Body = S->body(); - // We only report undefined symbols in regular objects. This means that we - // will accept an undefined reference in bitcode if it can be optimized out. - if (S->IsUsedInRegularObj && Body->isUndefined() && !S->isWeak()) - reportUndefined<ELFT>(Symtab, Body); - - if (auto *C = dyn_cast<DefinedCommon>(Body)) - CommonSymbols.push_back(C); - if (!includeInSymtab<ELFT>(*Body)) continue; - if (Out<ELFT>::SymTab) - Out<ELFT>::SymTab->addSymbol(Body); + if (In<ELFT>::SymTab) + In<ELFT>::SymTab->addSymbol(Body); - if (isOutputDynamic() && S->includeInDynsym()) { - Out<ELFT>::DynSymTab->addSymbol(Body); + if (In<ELFT>::DynSymTab && S->includeInDynsym()) { + In<ELFT>::DynSymTab->addSymbol(Body); if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(Body)) if (SS->file()->isNeeded()) - Out<ELFT>::VerNeed->addSymbol(SS); + In<ELFT>::VerNeed->addSymbol(SS); } } // Do not proceed if there was an undefined symbol. - if (HasError) + if (ErrorCount) return; - addCommonSymbols(CommonSymbols); - // So far we have added sections from input object files. // This function adds linker-created Out<ELFT>::* sections. addPredefinedSections(); + removeUnusedSyntheticSections<ELFT>(OutputSections); - std::stable_sort(OutputSections.begin(), OutputSections.end(), - compareSections<ELFT>); + sortSections(); unsigned I = 1; - for (OutputSectionBase<ELFT> *Sec : OutputSections) { + for (OutputSectionBase *Sec : OutputSections) { Sec->SectionIndex = I++; - Sec->setSHName(Out<ELFT>::ShStrTab->addString(Sec->getName())); + Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->getName()); } - // Finalizers fix each section's size. - // .dynsym is finalized early since that may fill up .gnu.hash. - if (isOutputDynamic()) - Out<ELFT>::DynSymTab->finalize(); + // Binary and relocatable output does not have PHDRS. + // The headers have to be created before finalize as that can influence the + // image base and the dynamic section on mips includes the image base. + if (!Config->Relocatable && !Config->OFormatBinary) { + Phdrs = Script<ELFT>::X->hasPhdrsCommands() ? Script<ELFT>::X->createPhdrs() + : createPhdrs(); + addPtArmExid(Phdrs); + fixHeaders(); + } // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result - // of finalizing other sections. The dynamic string table is - // finalized once the .dynamic finalizer has added a few last - // strings. See DynamicSection::finalize() - for (OutputSectionBase<ELFT> *Sec : OutputSections) - if (Sec != Out<ELFT>::DynStrTab && Sec != Out<ELFT>::Dynamic) - Sec->finalize(); - - if (isOutputDynamic()) - Out<ELFT>::Dynamic->finalize(); - - // Now that all output offsets are fixed. Finalize mergeable sections - // to fix their maps from input offsets to output offsets. - for (OutputSectionBase<ELFT> *Sec : OutputSections) - Sec->finalizePieces(); + // of finalizing other sections. + for (OutputSectionBase *Sec : OutputSections) + Sec->finalize(); + + // Dynamic section must be the last one in this list and dynamic + // symbol table section (DynSymTab) must be the first one. + finalizeSynthetic<ELFT>( + {In<ELFT>::DynSymTab, In<ELFT>::GnuHashTab, In<ELFT>::HashTab, + In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab, + In<ELFT>::VerDef, In<ELFT>::DynStrTab, In<ELFT>::GdbIndex, + In<ELFT>::Got, In<ELFT>::MipsGot, In<ELFT>::IgotPlt, + In<ELFT>::GotPlt, In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, + In<ELFT>::RelaPlt, In<ELFT>::Plt, In<ELFT>::Iplt, + In<ELFT>::Plt, In<ELFT>::EhFrameHdr, In<ELFT>::VerSym, + In<ELFT>::VerNeed, In<ELFT>::Dynamic}); } -template <class ELFT> bool Writer<ELFT>::needsGot() { - if (!Out<ELFT>::Got->empty()) - return true; - - // We add the .got section to the result for dynamic MIPS target because - // its address and properties are mentioned in the .dynamic section. - if (Config->EMachine == EM_MIPS) - return true; - - // If we have a relocation that is relative to GOT (such as GOTOFFREL), - // we need to emit a GOT even if it's empty. - return Out<ELFT>::Got->HasGotOffRel; -} - -// This function add Out<ELFT>::* sections to OutputSections. template <class ELFT> void Writer<ELFT>::addPredefinedSections() { - auto Add = [&](OutputSectionBase<ELFT> *C) { - if (C) - OutputSections.push_back(C); - }; - - // A core file does not usually contain unmodified segments except - // the first page of the executable. Add the build ID section to beginning of - // the file so that the section is included in the first page. - if (Out<ELFT>::BuildId) - OutputSections.insert(OutputSections.begin(), Out<ELFT>::BuildId); - - // Add .interp at first because some loaders want to see that section - // on the first page of the executable file when loaded into memory. - if (needsInterpSection()) - OutputSections.insert(OutputSections.begin(), Out<ELFT>::Interp); - - // This order is not the same as the final output order - // because we sort the sections using their attributes below. - Add(Out<ELFT>::SymTab); - Add(Out<ELFT>::ShStrTab); - Add(Out<ELFT>::StrTab); - if (isOutputDynamic()) { - Add(Out<ELFT>::DynSymTab); - - bool HasVerNeed = Out<ELFT>::VerNeed->getNeedNum() != 0; - if (Out<ELFT>::VerDef || HasVerNeed) - Add(Out<ELFT>::VerSym); - Add(Out<ELFT>::VerDef); - if (HasVerNeed) - Add(Out<ELFT>::VerNeed); - - Add(Out<ELFT>::GnuHashTab); - Add(Out<ELFT>::HashTab); - Add(Out<ELFT>::Dynamic); - Add(Out<ELFT>::DynStrTab); - if (Out<ELFT>::RelaDyn->hasRelocs()) - Add(Out<ELFT>::RelaDyn); - Add(Out<ELFT>::MipsRldMap); - } + if (Out<ELFT>::Bss->Size > 0) + OutputSections.push_back(Out<ELFT>::Bss); - // We always need to add rel[a].plt to output if it has entries. - // Even during static linking it can contain R_[*]_IRELATIVE relocations. - if (Out<ELFT>::RelaPlt && Out<ELFT>::RelaPlt->hasRelocs()) { - Add(Out<ELFT>::RelaPlt); - Out<ELFT>::RelaPlt->Static = !isOutputDynamic(); - } + auto OS = dyn_cast_or_null<OutputSection<ELFT>>(findSection(".ARM.exidx")); + if (OS && !OS->Sections.empty() && !Config->Relocatable) + OS->addSection(make<ARMExidxSentinelSection<ELFT>>()); - if (needsGot()) - Add(Out<ELFT>::Got); - if (Out<ELFT>::GotPlt && !Out<ELFT>::GotPlt->empty()) - Add(Out<ELFT>::GotPlt); - if (!Out<ELFT>::Plt->empty()) - Add(Out<ELFT>::Plt); - if (!Out<ELFT>::EhFrame->empty()) - Add(Out<ELFT>::EhFrameHdr); - if (Out<ELFT>::Bss->getSize() > 0) - Add(Out<ELFT>::Bss); + addInputSec(In<ELFT>::SymTab); + addInputSec(In<ELFT>::ShStrTab); + addInputSec(In<ELFT>::StrTab); } // The linker is expected to define SECNAME_start and SECNAME_end // symbols for a few sections. This function defines them. template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { - auto Define = [&](StringRef Start, StringRef End, - OutputSectionBase<ELFT> *OS) { - if (OS) { - this->Symtab.addSynthetic(Start, OS, 0); - this->Symtab.addSynthetic(End, OS, DefinedSynthetic<ELFT>::SectionEnd); - } else { - addOptionalSynthetic(this->Symtab, Start, - (OutputSectionBase<ELFT> *)nullptr, 0); - addOptionalSynthetic(this->Symtab, End, - (OutputSectionBase<ELFT> *)nullptr, 0); - } + auto Define = [&](StringRef Start, StringRef End, OutputSectionBase *OS) { + // These symbols resolve to the image base if the section does not exist. + // A special value -1 indicates end of the section. + addOptionalSynthetic<ELFT>(Start, OS, 0); + addOptionalSynthetic<ELFT>(End, OS, OS ? -1 : 0); }; Define("__preinit_array_start", "__preinit_array_end", - Out<ELFT>::Dynamic->PreInitArraySec); - Define("__init_array_start", "__init_array_end", - Out<ELFT>::Dynamic->InitArraySec); - Define("__fini_array_start", "__fini_array_end", - Out<ELFT>::Dynamic->FiniArraySec); + Out<ELFT>::PreinitArray); + Define("__init_array_start", "__init_array_end", Out<ELFT>::InitArray); + Define("__fini_array_start", "__fini_array_end", Out<ELFT>::FiniArray); + + if (OutputSectionBase *Sec = findSection(".ARM.exidx")) + Define("__exidx_start", "__exidx_end", Sec); } // If a section name is valid as a C identifier (which is rare because of @@ -867,145 +1109,181 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { // respectively. This is not requested by the ELF standard, but GNU ld and // gold provide the feature, and used by many programs. template <class ELFT> -void Writer<ELFT>::addStartStopSymbols(OutputSectionBase<ELFT> *Sec) { +void Writer<ELFT>::addStartStopSymbols(OutputSectionBase *Sec) { StringRef S = Sec->getName(); if (!isValidCIdentifier(S)) return; - StringSaver Saver(Alloc); - StringRef Start = Saver.save("__start_" + S); - StringRef Stop = Saver.save("__stop_" + S); - if (SymbolBody *B = Symtab.find(Start)) - if (B->isUndefined()) - Symtab.addSynthetic(Start, Sec, 0); - if (SymbolBody *B = Symtab.find(Stop)) - if (B->isUndefined()) - Symtab.addSynthetic(Stop, Sec, DefinedSynthetic<ELFT>::SectionEnd); + addOptionalSynthetic<ELFT>(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); + addOptionalSynthetic<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); +} + +template <class ELFT> +OutputSectionBase *Writer<ELFT>::findSection(StringRef Name) { + for (OutputSectionBase *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; } -template <class ELFT> static bool needsPtLoad(OutputSectionBase<ELFT> *Sec) { - if (!(Sec->getFlags() & SHF_ALLOC)) +template <class ELFT> static bool needsPtLoad(OutputSectionBase *Sec) { + if (!(Sec->Flags & SHF_ALLOC)) return false; // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is // responsible for allocating space for them, not the PT_LOAD that // contains the TLS initialization image. - if (Sec->getFlags() & SHF_TLS && Sec->getType() == SHT_NOBITS) + if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) return false; return true; } -static uint32_t toPhdrFlags(uint64_t Flags) { - uint32_t Ret = PF_R; - if (Flags & SHF_WRITE) - Ret |= PF_W; - if (Flags & SHF_EXECINSTR) - Ret |= PF_X; - return Ret; +// Linker scripts are responsible for aligning addresses. Unfortunately, most +// linker scripts are designed for creating two PT_LOADs only, one RX and one +// RW. This means that there is no alignment in the RO to RX transition and we +// cannot create a PT_LOAD there. +template <class ELFT> +static typename ELFT::uint computeFlags(typename ELFT::uint F) { + if (Config->OMagic) + return PF_R | PF_W | PF_X; + if (Config->SingleRoRx && !(F & PF_W)) + return F | PF_X; + return F; } // Decide which program headers to create and which sections to include in each // one. -template <class ELFT> void Writer<ELFT>::createPhdrs() { - auto AddHdr = [this](unsigned Type, unsigned Flags) { - return &*Phdrs.emplace(Phdrs.end(), Type, Flags); - }; - - auto AddSec = [](Phdr &Hdr, OutputSectionBase<ELFT> *Sec) { - Hdr.Last = Sec; - if (!Hdr.First) - Hdr.First = Sec; - Hdr.H.p_align = std::max<uintX_t>(Hdr.H.p_align, Sec->getAlignment()); +template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { + std::vector<PhdrEntry> Ret; + auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { + Ret.emplace_back(Type, Flags); + return &Ret.back(); }; // The first phdr entry is PT_PHDR which describes the program header itself. - Phdr &Hdr = *AddHdr(PT_PHDR, PF_R); - AddSec(Hdr, Out<ELFT>::ProgramHeaders); + PhdrEntry &Hdr = *AddHdr(PT_PHDR, PF_R); + Hdr.add(Out<ELFT>::ProgramHeaders); // PT_INTERP must be the second entry if exists. - if (needsInterpSection()) { - Phdr &Hdr = *AddHdr(PT_INTERP, toPhdrFlags(Out<ELFT>::Interp->getFlags())); - AddSec(Hdr, Out<ELFT>::Interp); + if (OutputSectionBase *Sec = findSection(".interp")) { + PhdrEntry &Hdr = *AddHdr(PT_INTERP, Sec->getPhdrFlags()); + Hdr.add(Sec); } // Add the first PT_LOAD segment for regular output sections. - uintX_t Flags = PF_R; - Phdr *Load = AddHdr(PT_LOAD, Flags); - AddSec(*Load, Out<ELFT>::ElfHeader); - AddSec(*Load, Out<ELFT>::ProgramHeaders); - - Phdr TlsHdr(PT_TLS, PF_R); - Phdr RelRo(PT_GNU_RELRO, PF_R); - Phdr Note(PT_NOTE, PF_R); - for (OutputSectionBase<ELFT> *Sec : OutputSections) { - if (!(Sec->getFlags() & SHF_ALLOC)) + uintX_t Flags = computeFlags<ELFT>(PF_R); + PhdrEntry *Load = AddHdr(PT_LOAD, Flags); + + PhdrEntry TlsHdr(PT_TLS, PF_R); + PhdrEntry RelRo(PT_GNU_RELRO, PF_R); + PhdrEntry Note(PT_NOTE, PF_R); + for (OutputSectionBase *Sec : OutputSections) { + if (!(Sec->Flags & SHF_ALLOC)) break; // If we meet TLS section then we create TLS header - // and put all TLS sections inside for futher use when + // and put all TLS sections inside for further use when // assign addresses. - if (Sec->getFlags() & SHF_TLS) - AddSec(TlsHdr, Sec); + if (Sec->Flags & SHF_TLS) + TlsHdr.add(Sec); if (!needsPtLoad<ELFT>(Sec)) continue; - // If flags changed then we want new load segment. - uintX_t NewFlags = toPhdrFlags(Sec->getFlags()); - if (Flags != NewFlags) { + // Segments are contiguous memory regions that has the same attributes + // (e.g. executable or writable). There is one phdr for each segment. + // Therefore, we need to create a new phdr when the next section has + // different flags or is loaded at a discontiguous address using AT linker + // script command. + uintX_t NewFlags = computeFlags<ELFT>(Sec->getPhdrFlags()); + if (Script<ELFT>::X->hasLMA(Sec->getName()) || Flags != NewFlags) { Load = AddHdr(PT_LOAD, NewFlags); Flags = NewFlags; } - AddSec(*Load, Sec); + Load->add(Sec); - if (isRelroSection(Sec)) - AddSec(RelRo, Sec); - if (Sec->getType() == SHT_NOTE) - AddSec(Note, Sec); + if (isRelroSection<ELFT>(Sec)) + RelRo.add(Sec); + if (Sec->Type == SHT_NOTE) + Note.add(Sec); } // Add the TLS segment unless it's empty. if (TlsHdr.First) - Phdrs.push_back(std::move(TlsHdr)); + Ret.push_back(std::move(TlsHdr)); // Add an entry for .dynamic. - if (isOutputDynamic()) { - Phdr &H = *AddHdr(PT_DYNAMIC, toPhdrFlags(Out<ELFT>::Dynamic->getFlags())); - AddSec(H, Out<ELFT>::Dynamic); + if (In<ELFT>::DynSymTab) { + PhdrEntry &H = + *AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags()); + H.add(In<ELFT>::Dynamic->OutSec); } // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after proccessing relocations. if (RelRo.First) - Phdrs.push_back(std::move(RelRo)); + Ret.push_back(std::move(RelRo)); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. - if (!Out<ELFT>::EhFrame->empty() && Out<ELFT>::EhFrameHdr) { - Phdr &Hdr = *AddHdr(PT_GNU_EH_FRAME, - toPhdrFlags(Out<ELFT>::EhFrameHdr->getFlags())); - AddSec(Hdr, Out<ELFT>::EhFrameHdr); + if (!Out<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr) { + PhdrEntry &Hdr = + *AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->OutSec->getPhdrFlags()); + Hdr.add(In<ELFT>::EhFrameHdr->OutSec); + } + + // PT_OPENBSD_RANDOMIZE specifies the location and size of a part of the + // memory image of the program that must be filled with random data before any + // code in the object is executed. + if (OutputSectionBase *Sec = findSection(".openbsd.randomdata")) { + PhdrEntry &Hdr = *AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags()); + Hdr.add(Sec); } // PT_GNU_STACK is a special section to tell the loader to make the // pages for the stack non-executable. - if (!Config->ZExecStack) - AddHdr(PT_GNU_STACK, PF_R | PF_W); + if (!Config->ZExecstack) { + PhdrEntry &Hdr = *AddHdr(PT_GNU_STACK, PF_R | PF_W); + if (Config->ZStackSize != uint64_t(-1)) + Hdr.p_memsz = Config->ZStackSize; + } + + // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable + // is expected to perform W^X violations, such as calling mprotect(2) or + // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on + // OpenBSD. + if (Config->ZWxneeded) + AddHdr(PT_OPENBSD_WXNEEDED, PF_X); if (Note.First) - Phdrs.push_back(std::move(Note)); + Ret.push_back(std::move(Note)); + return Ret; +} + +template <class ELFT> +void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { + if (Config->EMachine != EM_ARM) + return; + auto I = std::find_if( + OutputSections.begin(), OutputSections.end(), + [](OutputSectionBase *Sec) { return Sec->Type == SHT_ARM_EXIDX; }); + if (I == OutputSections.end()) + return; - Out<ELFT>::ProgramHeaders->setSize(sizeof(Elf_Phdr) * Phdrs.size()); + // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME + PhdrEntry ARMExidx(PT_ARM_EXIDX, PF_R); + ARMExidx.add(*I); + Phdrs.push_back(ARMExidx); } // The first section of each PT_LOAD and the first section after PT_GNU_RELRO // have to be page aligned so that the dynamic linker can set the permissions. template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { - for (const Phdr &P : Phdrs) - if (P.H.p_type == PT_LOAD) + for (const PhdrEntry &P : Phdrs) + if (P.p_type == PT_LOAD && P.First) P.First->PageAlign = true; - for (const Phdr &P : Phdrs) { - if (P.H.p_type != PT_GNU_RELRO) + for (const PhdrEntry &P : Phdrs) { + if (P.p_type != PT_GNU_RELRO) continue; // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we // have to align it to a page. @@ -1013,43 +1291,86 @@ template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { auto I = std::find(OutputSections.begin(), End, P.Last); if (I == End || (I + 1) == End) continue; - OutputSectionBase<ELFT> *Sec = *(I + 1); - if (needsPtLoad(Sec)) + OutputSectionBase *Sec = *(I + 1); + if (needsPtLoad<ELFT>(Sec)) Sec->PageAlign = true; } } +template <class ELFT> +void elf::allocateHeaders(MutableArrayRef<PhdrEntry> Phdrs, + ArrayRef<OutputSectionBase *> OutputSections) { + auto FirstPTLoad = + std::find_if(Phdrs.begin(), Phdrs.end(), + [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); + if (FirstPTLoad == Phdrs.end()) + return; + if (FirstPTLoad->First) + for (OutputSectionBase *Sec : OutputSections) + if (Sec->FirstInPtLoad == FirstPTLoad->First) + Sec->FirstInPtLoad = Out<ELFT>::ElfHeader; + FirstPTLoad->First = Out<ELFT>::ElfHeader; + if (!FirstPTLoad->Last) + FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; +} + // We should set file offsets and VAs for elf header and program headers // sections. These are special, we do not include them into output sections // list, but have them to simplify the code. template <class ELFT> void Writer<ELFT>::fixHeaders() { - uintX_t BaseVA = ScriptConfig->DoLayout ? 0 : Config->ImageBase; - Out<ELFT>::ElfHeader->setVA(BaseVA); - uintX_t Off = Out<ELFT>::ElfHeader->getSize(); - Out<ELFT>::ProgramHeaders->setVA(Off + BaseVA); + Out<ELFT>::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); + // If the script has SECTIONS, assignAddresses will compute the values. + if (ScriptConfig->HasSections) + return; + + uintX_t HeaderSize = getHeaderSize<ELFT>(); + // When -T<section> option is specified, lower the base to make room for those + // sections. + if (!Config->SectionStartMap.empty()) { + uint64_t Min = -1; + for (const auto &P : Config->SectionStartMap) + Min = std::min(Min, P.second); + if (HeaderSize < Min) + Min -= HeaderSize; + else + AllocateHeader = false; + if (Min < Config->ImageBase) + Config->ImageBase = alignDown(Min, Config->MaxPageSize); + } + + if (AllocateHeader) + allocateHeaders<ELFT>(Phdrs, OutputSections); + + uintX_t BaseVA = Config->ImageBase; + Out<ELFT>::ElfHeader->Addr = BaseVA; + Out<ELFT>::ProgramHeaders->Addr = BaseVA + Out<ELFT>::ElfHeader->Size; } // Assign VAs (addresses at run-time) to output sections. template <class ELFT> void Writer<ELFT>::assignAddresses() { - uintX_t VA = Config->ImageBase + Out<ELFT>::ElfHeader->getSize() + - Out<ELFT>::ProgramHeaders->getSize(); - + uintX_t VA = Config->ImageBase; + if (AllocateHeader) + VA += getHeaderSize<ELFT>(); uintX_t ThreadBssOffset = 0; - for (OutputSectionBase<ELFT> *Sec : OutputSections) { - uintX_t Alignment = Sec->getAlignment(); + for (OutputSectionBase *Sec : OutputSections) { + uintX_t Alignment = Sec->Addralign; if (Sec->PageAlign) - Alignment = std::max<uintX_t>(Alignment, Target->PageSize); + Alignment = std::max<uintX_t>(Alignment, Config->MaxPageSize); + + auto I = Config->SectionStartMap.find(Sec->getName()); + if (I != Config->SectionStartMap.end()) + VA = I->second; // We only assign VAs to allocated sections. if (needsPtLoad<ELFT>(Sec)) { VA = alignTo(VA, Alignment); - Sec->setVA(VA); - VA += Sec->getSize(); - } else if (Sec->getFlags() & SHF_TLS && Sec->getType() == SHT_NOBITS) { + Sec->Addr = VA; + VA += Sec->Size; + } else if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) { uintX_t TVA = VA + ThreadBssOffset; TVA = alignTo(TVA, Alignment); - Sec->setVA(TVA); - ThreadBssOffset = TVA - VA + Sec->getSize(); + Sec->Addr = TVA; + ThreadBssOffset = TVA - VA + Sec->Size; } } } @@ -1059,38 +1380,50 @@ template <class ELFT> void Writer<ELFT>::assignAddresses() { // virtual address (modulo the page size) so that the loader can load // executables without any address adjustment. template <class ELFT, class uintX_t> -static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase<ELFT> *Sec) { - uintX_t Alignment = Sec->getAlignment(); - if (Sec->PageAlign) - Alignment = std::max<uintX_t>(Alignment, Target->PageSize); - Off = alignTo(Off, Alignment); - - // Relocatable output does not have program headers - // and does not need any other offset adjusting. - if (Config->Relocatable || !(Sec->getFlags() & SHF_ALLOC)) - return Off; - return alignTo(Off, Target->PageSize, Sec->getVA()); +static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase *Sec) { + OutputSectionBase *First = Sec->FirstInPtLoad; + // If the section is not in a PT_LOAD, we just have to align it. + if (!First) + return alignTo(Off, Sec->Addralign); + + // The first section in a PT_LOAD has to have congruent offset and address + // module the page size. + if (Sec == First) + return alignTo(Off, Config->MaxPageSize, Sec->Addr); + + // If two sections share the same PT_LOAD the file offset is calculated + // using this formula: Off2 = Off1 + (VA2 - VA1). + return First->Offset + Sec->Addr - First->Addr; +} + +template <class ELFT, class uintX_t> +void setOffset(OutputSectionBase *Sec, uintX_t &Off) { + if (Sec->Type == SHT_NOBITS) { + Sec->Offset = Off; + return; + } + + Off = getFileAlignment<ELFT>(Off, Sec); + Sec->Offset = Off; + Off += Sec->Size; +} + +template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { + uintX_t Off = 0; + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + setOffset<ELFT>(Sec, Off); + FileSize = alignTo(Off, sizeof(uintX_t)); } // Assign file offsets to output sections. template <class ELFT> void Writer<ELFT>::assignFileOffsets() { uintX_t Off = 0; + setOffset<ELFT>(Out<ELFT>::ElfHeader, Off); + setOffset<ELFT>(Out<ELFT>::ProgramHeaders, Off); - auto Set = [&](OutputSectionBase<ELFT> *Sec) { - if (Sec->getType() == SHT_NOBITS) { - Sec->setFileOffset(Off); - return; - } - - Off = getFileAlignment<ELFT>(Off, Sec); - Sec->setFileOffset(Off); - Off += Sec->getSize(); - }; - - Set(Out<ELFT>::ElfHeader); - Set(Out<ELFT>::ProgramHeaders); - for (OutputSectionBase<ELFT> *Sec : OutputSections) - Set(Sec); + for (OutputSectionBase *Sec : OutputSections) + setOffset<ELFT>(Sec, Off); SectionHeaderOff = alignTo(Off, sizeof(uintX_t)); FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); @@ -1099,50 +1432,62 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() { // Finalize the program headers. We call this function after we assign // file offsets and VAs to all sections. template <class ELFT> void Writer<ELFT>::setPhdrs() { - for (Phdr &P : Phdrs) { - Elf_Phdr &H = P.H; - OutputSectionBase<ELFT> *First = P.First; - OutputSectionBase<ELFT> *Last = P.Last; + for (PhdrEntry &P : Phdrs) { + OutputSectionBase *First = P.First; + OutputSectionBase *Last = P.Last; if (First) { - H.p_filesz = Last->getFileOff() - First->getFileOff(); - if (Last->getType() != SHT_NOBITS) - H.p_filesz += Last->getSize(); - H.p_memsz = Last->getVA() + Last->getSize() - First->getVA(); - H.p_offset = First->getFileOff(); - H.p_vaddr = First->getVA(); + P.p_filesz = Last->Offset - First->Offset; + if (Last->Type != SHT_NOBITS) + P.p_filesz += Last->Size; + P.p_memsz = Last->Addr + Last->Size - First->Addr; + P.p_offset = First->Offset; + P.p_vaddr = First->Addr; + if (!P.HasLMA) + P.p_paddr = First->getLMA(); } - if (H.p_type == PT_LOAD) - H.p_align = Target->PageSize; - else if (H.p_type == PT_GNU_RELRO) - H.p_align = 1; - H.p_paddr = H.p_vaddr; + if (P.p_type == PT_LOAD) + P.p_align = Config->MaxPageSize; + else if (P.p_type == PT_GNU_RELRO) + P.p_align = 1; // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. - if (H.p_type == PT_TLS) { - Out<ELFT>::TlsPhdr = &H; - H.p_memsz = alignTo(H.p_memsz, H.p_align); + if (P.p_type == PT_TLS) { + Out<ELFT>::TlsPhdr = &P; + if (P.p_memsz) + P.p_memsz = alignTo(P.p_memsz, P.p_align); } } } -static uint32_t getMipsEFlags(bool Is64Bits) { - // FIXME: In fact ELF flags depends on ELF flags of input object files - // and selected emulation. For now just use hard coded values. - if (Is64Bits) - return EF_MIPS_CPIC | EF_MIPS_PIC | EF_MIPS_ARCH_64R2; - - uint32_t V = EF_MIPS_CPIC | EF_MIPS_ABI_O32 | EF_MIPS_ARCH_32R2; - if (Config->Shared) - V |= EF_MIPS_PIC; - return V; -} +// The entry point address is chosen in the following ways. +// +// 1. the '-e' entry command-line option; +// 2. the ENTRY(symbol) command in a linker control script; +// 3. the value of the symbol start, if present; +// 4. the address of the first byte of the .text section, if present; +// 5. the address 0. +template <class ELFT> typename ELFT::uint Writer<ELFT>::getEntryAddr() { + // Case 1, 2 or 3. As a special case, if the symbol is actually + // a number, we'll use that number as an address. + if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry)) + return B->getVA<ELFT>(); + uint64_t Addr; + if (!Config->Entry.getAsInteger(0, Addr)) + return Addr; + + // Case 4 + if (OutputSectionBase *Sec = findSection(".text")) { + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + + utohexstr(Sec->Addr)); + return Sec->Addr; + } -template <class ELFT> static typename ELFT::uint getEntryAddr() { - if (Symbol *S = Config->EntrySym) - return S->body()->getVA<ELFT>(); - if (Config->EntryAddr != uint64_t(-1)) - return Config->EntryAddr; + // Case 5 + if (Config->WarnMissingEntry) + warn("cannot find entry symbol " + Config->Entry + + "; not setting start address"); return 0; } @@ -1164,6 +1509,10 @@ static uint16_t getELFType() { // to each section. This function fixes some predefined absolute // symbol values that depend on section address and size. template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { + // __ehdr_start is the location of program headers. + if (ElfSym<ELFT>::EhdrStart) + ElfSym<ELFT>::EhdrStart->Value = Out<ELFT>::ProgramHeaders->Addr; + auto Set = [](DefinedRegular<ELFT> *S1, DefinedRegular<ELFT> *S2, uintX_t V) { if (S1) S1->Value = V; @@ -1174,45 +1523,66 @@ template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { // _etext is the first location after the last read-only loadable segment. // _edata is the first location after the last read-write loadable segment. // _end is the first location after the uninitialized data region. - for (Phdr &P : Phdrs) { - Elf_Phdr &H = P.H; - if (H.p_type != PT_LOAD) + for (PhdrEntry &P : Phdrs) { + if (P.p_type != PT_LOAD) continue; - Set(ElfSym<ELFT>::End, ElfSym<ELFT>::End2, H.p_vaddr + H.p_memsz); + Set(ElfSym<ELFT>::End, ElfSym<ELFT>::End2, P.p_vaddr + P.p_memsz); - uintX_t Val = H.p_vaddr + H.p_filesz; - if (H.p_flags & PF_W) + uintX_t Val = P.p_vaddr + P.p_filesz; + if (P.p_flags & PF_W) Set(ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2, Val); else Set(ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2, Val); } + + // Setup MIPS _gp_disp/__gnu_local_gp symbols which should + // be equal to the _gp symbol's value. + if (Config->EMachine == EM_MIPS) { + if (!ElfSym<ELFT>::MipsGp->Value) { + // Find GP-relative section with the lowest address + // and use this address to calculate default _gp value. + uintX_t Gp = -1; + for (const OutputSectionBase * OS : OutputSections) + if ((OS->Flags & SHF_MIPS_GPREL) && OS->Addr < Gp) + Gp = OS->Addr; + if (Gp != (uintX_t)-1) + ElfSym<ELFT>::MipsGp->Value = Gp + 0x7ff0; + } + if (ElfSym<ELFT>::MipsGpDisp) + ElfSym<ELFT>::MipsGpDisp->Value = ElfSym<ELFT>::MipsGp->Value; + if (ElfSym<ELFT>::MipsLocalGp) + ElfSym<ELFT>::MipsLocalGp->Value = ElfSym<ELFT>::MipsGp->Value; + } } template <class ELFT> void Writer<ELFT>::writeHeader() { uint8_t *Buf = Buffer->getBufferStart(); memcpy(Buf, "\177ELF", 4); - auto &FirstObj = cast<ELFFileBase<ELFT>>(*Config->FirstElf); - // Write the ELF header. auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf); EHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; EHdr->e_ident[EI_DATA] = getELFEncoding<ELFT>(); EHdr->e_ident[EI_VERSION] = EV_CURRENT; - EHdr->e_ident[EI_OSABI] = FirstObj.getOSABI(); + EHdr->e_ident[EI_OSABI] = Config->OSABI; EHdr->e_type = getELFType(); - EHdr->e_machine = FirstObj.EMachine; + EHdr->e_machine = Config->EMachine; EHdr->e_version = EV_CURRENT; - EHdr->e_entry = getEntryAddr<ELFT>(); + EHdr->e_entry = getEntryAddr(); EHdr->e_shoff = SectionHeaderOff; EHdr->e_ehsize = sizeof(Elf_Ehdr); EHdr->e_phnum = Phdrs.size(); EHdr->e_shentsize = sizeof(Elf_Shdr); EHdr->e_shnum = OutputSections.size() + 1; - EHdr->e_shstrndx = Out<ELFT>::ShStrTab->SectionIndex; + EHdr->e_shstrndx = In<ELFT>::ShStrTab->OutSec->SectionIndex; - if (Config->EMachine == EM_MIPS) - EHdr->e_flags = getMipsEFlags(ELFT::Is64Bits); + if (Config->EMachine == EM_ARM) + // We don't currently use any features incompatible with EF_ARM_EABI_VER5, + // but we don't have any firm guarantees of conformance. Linux AArch64 + // kernels (as of 2016) require an EABI version to be set. + EHdr->e_flags = EF_ARM_EABI_VER5; + else if (Config->EMachine == EM_MIPS) + EHdr->e_flags = getMipsEFlags<ELFT>(); if (!Config->Relocatable) { EHdr->e_phoff = sizeof(Elf_Ehdr); @@ -1221,63 +1591,131 @@ template <class ELFT> void Writer<ELFT>::writeHeader() { // Write the program header table. auto *HBuf = reinterpret_cast<Elf_Phdr *>(Buf + EHdr->e_phoff); - for (Phdr &P : Phdrs) - *HBuf++ = P.H; + for (PhdrEntry &P : Phdrs) { + HBuf->p_type = P.p_type; + HBuf->p_flags = P.p_flags; + HBuf->p_offset = P.p_offset; + HBuf->p_vaddr = P.p_vaddr; + HBuf->p_paddr = P.p_paddr; + HBuf->p_filesz = P.p_filesz; + HBuf->p_memsz = P.p_memsz; + HBuf->p_align = P.p_align; + ++HBuf; + } // Write the section header table. Note that the first table entry is null. auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); - for (OutputSectionBase<ELFT> *Sec : OutputSections) - Sec->writeHeaderTo(++SHdrs); + for (OutputSectionBase *Sec : OutputSections) + Sec->writeHeaderTo<ELFT>(++SHdrs); +} + +// Removes a given file asynchronously. This is a performance hack, +// so remove this when operating systems are improved. +// +// On Linux (and probably on other Unix-like systems), unlink(2) is a +// noticeably slow system call. As of 2016, unlink takes 250 +// milliseconds to remove a 1 GB file on ext4 filesystem on my machine. +// +// To create a new result file, we first remove existing file. So, if +// you repeatedly link a 1 GB program in a regular compile-link-debug +// cycle, every cycle wastes 250 milliseconds only to remove a file. +// Since LLD can link a 1 GB binary in about 5 seconds, that waste +// actually counts. +// +// This function spawns a background thread to call unlink. +// The calling thread returns almost immediately. +static void unlinkAsync(StringRef Path) { + if (!Config->Threads || !sys::fs::exists(Config->OutputFile)) + return; + + // First, rename Path to avoid race condition. We cannot remove + // Path from a different thread because we are now going to create + // Path as a new file. If we do that in a different thread, the new + // thread can remove the new file. + SmallString<128> TempPath; + if (auto EC = sys::fs::createUniqueFile(Path + "tmp%%%%%%%%", TempPath)) + fatal(EC, "createUniqueFile failed"); + if (auto EC = sys::fs::rename(Path, TempPath)) + fatal(EC, "rename failed"); + + // Remove TempPath in background. + std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); } +// Open a result file. template <class ELFT> void Writer<ELFT>::openFile() { + unlinkAsync(Config->OutputFile); ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, FileOutputBuffer::F_executable); + if (auto EC = BufferOrErr.getError()) error(EC, "failed to open " + Config->OutputFile); else Buffer = std::move(*BufferOrErr); } +template <class ELFT> void Writer<ELFT>::writeSectionsBinary() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSectionBase *Sec : OutputSections) + if (Sec->Flags & SHF_ALLOC) + Sec->writeTo(Buf + Sec->Offset); +} + // Write section contents to a mmap'ed file. template <class ELFT> void Writer<ELFT>::writeSections() { uint8_t *Buf = Buffer->getBufferStart(); - // PPC64 needs to process relocations in the .opd section before processing - // relocations in code-containing sections. - if (OutputSectionBase<ELFT> *Sec = Out<ELFT>::Opd) { - Out<ELFT>::OpdBuf = Buf + Sec->getFileOff(); - Sec->writeTo(Buf + Sec->getFileOff()); + // PPC64 needs to process relocations in the .opd section + // before processing relocations in code-containing sections. + Out<ELFT>::Opd = findSection(".opd"); + if (Out<ELFT>::Opd) { + Out<ELFT>::OpdBuf = Buf + Out<ELFT>::Opd->Offset; + Out<ELFT>::Opd->writeTo(Buf + Out<ELFT>::Opd->Offset); } - for (OutputSectionBase<ELFT> *Sec : OutputSections) - if (Sec != Out<ELFT>::Opd) - Sec->writeTo(Buf + Sec->getFileOff()); + OutputSectionBase *EhFrameHdr = + In<ELFT>::EhFrameHdr ? In<ELFT>::EhFrameHdr->OutSec : nullptr; + for (OutputSectionBase *Sec : OutputSections) + if (Sec != Out<ELFT>::Opd && Sec != EhFrameHdr) + Sec->writeTo(Buf + Sec->Offset); + + // The .eh_frame_hdr depends on .eh_frame section contents, therefore + // it should be written after .eh_frame is written. + if (!Out<ELFT>::EhFrame->empty() && EhFrameHdr) + EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); } template <class ELFT> void Writer<ELFT>::writeBuildId() { - if (!Out<ELFT>::BuildId) + if (!In<ELFT>::BuildId || !In<ELFT>::BuildId->OutSec) return; - // Compute a hash of all sections except .debug_* sections. - // We skip debug sections because they tend to be very large - // and their contents are very likely to be the same as long as - // other sections are the same. + // Compute a hash of all sections of the output file. uint8_t *Start = Buffer->getBufferStart(); - uint8_t *Last = Start; - std::vector<ArrayRef<uint8_t>> Regions; - for (OutputSectionBase<ELFT> *Sec : OutputSections) { - uint8_t *End = Start + Sec->getFileOff(); - if (!Sec->getName().startswith(".debug_")) - Regions.push_back({Last, End}); - Last = End; - } - Regions.push_back({Last, Start + FileSize}); - Out<ELFT>::BuildId->writeBuildId(Regions); + uint8_t *End = Start + FileSize; + In<ELFT>::BuildId->writeBuildId({Start, End}); } -template void elf::writeResult<ELF32LE>(SymbolTable<ELF32LE> *Symtab); -template void elf::writeResult<ELF32BE>(SymbolTable<ELF32BE> *Symtab); -template void elf::writeResult<ELF64LE>(SymbolTable<ELF64LE> *Symtab); -template void elf::writeResult<ELF64BE>(SymbolTable<ELF64BE> *Symtab); +template void elf::writeResult<ELF32LE>(); +template void elf::writeResult<ELF32BE>(); +template void elf::writeResult<ELF64LE>(); +template void elf::writeResult<ELF64BE>(); + +template void elf::allocateHeaders<ELF32LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF32BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64LE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); +template void elf::allocateHeaders<ELF64BE>(MutableArrayRef<PhdrEntry>, + ArrayRef<OutputSectionBase *>); + +template bool elf::isRelroSection<ELF32LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF32BE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64LE>(const OutputSectionBase *); +template bool elf::isRelroSection<ELF64BE>(const OutputSectionBase *); + +template void elf::reportDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); +template void elf::reportDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); +template void elf::reportDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); +template void elf::reportDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h index df25d8e404c2..718e3139a809 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.h +++ b/contrib/llvm/tools/lld/ELF/Writer.h @@ -10,28 +10,56 @@ #ifndef LLD_ELF_WRITER_H #define LLD_ELF_WRITER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include <cstdint> #include <memory> -namespace llvm { - class StringRef; -} - namespace lld { namespace elf { +class InputFile; +class OutputSectionBase; template <class ELFT> class InputSectionBase; template <class ELFT> class ObjectFile; template <class ELFT> class SymbolTable; +template <class ELFT> void writeResult(); +template <class ELFT> void markLive(); +template <class ELFT> bool isRelroSection(const OutputSectionBase *Sec); -template <class ELFT> void writeResult(SymbolTable<ELFT> *Symtab); +// This describes a program header entry. +// Each contains type, access flags and range of output sections that will be +// placed in it. +struct PhdrEntry { + PhdrEntry(unsigned Type, unsigned Flags); + void add(OutputSectionBase *Sec); -template <class ELFT> void markLive(); + uint64_t p_paddr = 0; + uint64_t p_vaddr = 0; + uint64_t p_align = 0; + uint64_t p_memsz = 0; + uint64_t p_filesz = 0; + uint64_t p_offset = 0; + uint32_t p_type = 0; + uint32_t p_flags = 0; -template <class ELFT> -llvm::StringRef getOutputSectionName(InputSectionBase<ELFT> *S); + OutputSectionBase *First = nullptr; + OutputSectionBase *Last = nullptr; + bool HasLMA = false; +}; + +llvm::StringRef getOutputSectionName(llvm::StringRef Name); template <class ELFT> -void reportDiscarded(InputSectionBase<ELFT> *IS, - const std::unique_ptr<elf::ObjectFile<ELFT>> &File); +void allocateHeaders(llvm::MutableArrayRef<PhdrEntry>, + llvm::ArrayRef<OutputSectionBase *>); +template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS); + +template <class ELFT> uint32_t getMipsEFlags(); + +uint8_t getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, + llvm::StringRef FileName); + +bool isMipsN32Abi(const InputFile *F); } } diff --git a/contrib/llvm/tools/lld/include/lld/Config/Version.h b/contrib/llvm/tools/lld/include/lld/Config/Version.h index 41433c1175ef..1cec3cc7678c 100644 --- a/contrib/llvm/tools/lld/include/lld/Config/Version.h +++ b/contrib/llvm/tools/lld/include/lld/Config/Version.h @@ -6,11 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Defines version macros and version-related utility functions -/// for lld. -/// +// +// Defines a version-related utility function. +// //===----------------------------------------------------------------------===// #ifndef LLD_VERSION_H @@ -18,34 +16,10 @@ #include "lld/Config/Version.inc" #include "llvm/ADT/StringRef.h" -#include <string> - -/// \brief Helper macro for LLD_VERSION_STRING. -#define LLD_MAKE_VERSION_STRING2(X) #X - -/// \brief Helper macro for LLD_VERSION_STRING. -#define LLD_MAKE_VERSION_STRING(X, Y) LLD_MAKE_VERSION_STRING2(X.Y) - -/// \brief A string that describes the lld version number, e.g., "1.0". -#define LLD_VERSION_STRING \ - LLD_MAKE_VERSION_STRING(LLD_VERSION_MAJOR, LLD_VERSION_MINOR) namespace lld { -/// \brief Retrieves the repository path (e.g., Subversion path) that -/// identifies the particular lld branch, tag, or trunk from which this -/// lld was built. -llvm::StringRef getLLDRepositoryPath(); - -/// \brief Retrieves the repository revision number (or identifer) from which -/// this lld was built. -llvm::StringRef getLLDRevision(); - -/// \brief Retrieves the full repository version that is an amalgamation of -/// the information in getLLDRepositoryPath() and getLLDRevision(). -std::string getLLDRepositoryVersion(); - /// \brief Retrieves a string representing the complete lld version. -llvm::StringRef getLLDVersion(); +std::string getLLDVersion(); } #endif // LLD_VERSION_H diff --git a/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in b/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in index c893a56686c0..2789a5c46089 100644 --- a/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in +++ b/contrib/llvm/tools/lld/include/lld/Config/Version.inc.in @@ -1,4 +1,5 @@ #define LLD_VERSION @LLD_VERSION@ +#define LLD_VERSION_STRING "@LLD_VERSION@" #define LLD_VERSION_MAJOR @LLD_VERSION_MAJOR@ #define LLD_VERSION_MINOR @LLD_VERSION_MINOR@ #define LLD_REVISION_STRING "@LLD_REVISION@" diff --git a/contrib/llvm/tools/lld/include/lld/Core/Atom.h b/contrib/llvm/tools/lld/include/lld/Core/Atom.h index 42ca2bb8af8c..156a5d4a736f 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Atom.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Atom.h @@ -1,4 +1,4 @@ -//===- Core/Atom.h - A node in linking graph ------------------------------===// +//===- Core/Atom.h - A node in linking graph --------------------*- C++ -*-===// // // The LLVM Linker // @@ -11,6 +11,7 @@ #define LLD_CORE_ATOM_H #include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" namespace lld { @@ -28,6 +29,7 @@ class OwningAtomPtr; /// class Atom { template<typename T> friend class OwningAtomPtr; + public: /// Whether this atom is defined or a proxy for an undefined symbol enum Definition { @@ -47,7 +49,6 @@ public: /// loader (e.g. visibility=default). }; - /// file - returns the File that produced/owns this Atom virtual const File& file() const = 0; @@ -69,7 +70,7 @@ protected: /// object. Therefore, no one but the owning File object should call /// delete on an Atom. In fact, some File objects may bulk allocate /// an array of Atoms, so they cannot be individually deleted by anyone. - virtual ~Atom() {} + virtual ~Atom() = default; private: Definition _definition; @@ -81,9 +82,10 @@ template<typename T> class OwningAtomPtr { private: OwningAtomPtr(const OwningAtomPtr &) = delete; - void operator=(const OwningAtomPtr&) = delete; + void operator=(const OwningAtomPtr &) = delete; + public: - OwningAtomPtr() : atom(nullptr) { } + OwningAtomPtr() = default; OwningAtomPtr(T *atom) : atom(atom) { } ~OwningAtomPtr() { @@ -121,9 +123,9 @@ public: } private: - T *atom; + T *atom = nullptr; }; -} // namespace lld +} // end namespace lld #endif // LLD_CORE_ATOM_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h index e3193f8aaf2e..7f623d2ea5e6 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h +++ b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h @@ -354,10 +354,6 @@ public: return atomContentType == typeCFI; } - // Returns true if lhs should be placed before rhs in the final output. - static bool compareByPosition(const DefinedAtom *lhs, - const DefinedAtom *rhs); - protected: // DefinedAtom is an abstract base class. Only subclasses can access // constructor. diff --git a/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h index 7e4edaf22cf3..b3a999b00fbd 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h +++ b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h @@ -1,4 +1,4 @@ -//===- lld/Core/LinkingContext.h - Linker Target Info Interface -----------===// +//===- lld/Core/LinkingContext.h - Linker Target Info Interface -*- C++ -*-===// // // The LLVM Linker // @@ -10,17 +10,21 @@ #ifndef LLD_CORE_LINKING_CONTEXT_H #define LLD_CORE_LINKING_CONTEXT_H -#include "lld/Core/Error.h" -#include "lld/Core/LLVM.h" #include "lld/Core/Node.h" -#include "lld/Core/Reference.h" #include "lld/Core/Reader.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <memory> #include <string> #include <vector> namespace lld { + class PassManager; class File; class Writer; @@ -117,12 +121,15 @@ public: void setDeadStripping(bool enable) { _deadStrip = enable; } void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; } + void setPrintRemainingUndefines(bool print) { _printRemainingUndefines = print; } + void setAllowRemainingUndefines(bool allow) { _allowRemainingUndefines = allow; } + void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; } void setLogInputFiles(bool log) { _logInputFiles = log; } @@ -149,7 +156,7 @@ public: /// during link. Flavors can override this function in their LinkingContext /// to add more internal files. These internal files are positioned before /// the actual input files. - virtual void createInternalFiles(std::vector<std::unique_ptr<File> > &) const; + virtual void createInternalFiles(std::vector<std::unique_ptr<File>> &) const; /// Return the list of undefined symbols that are specified in the /// linker command line, using the -u option. @@ -248,4 +255,4 @@ private: } // end namespace lld -#endif +#endif // LLD_CORE_LINKING_CONTEXT_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Node.h b/contrib/llvm/tools/lld/include/lld/Core/Node.h index 8de0ecdbba6a..c30482409e7a 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Node.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Node.h @@ -1,4 +1,4 @@ -//===- lld/Core/Node.h - Input file class ---------------------------------===// +//===- lld/Core/Node.h - Input file class -----------------------*- C++ -*-===// // // The LLVM Linker // @@ -17,9 +17,8 @@ #define LLD_CORE_NODE_H #include "lld/Core/File.h" -#include "llvm/Option/ArgList.h" +#include <algorithm> #include <memory> -#include <vector> namespace lld { @@ -29,8 +28,10 @@ namespace lld { class Node { public: enum class Kind { File, GroupEnd }; + explicit Node(Kind type) : _kind(type) {} - virtual ~Node() {} + virtual ~Node() = default; + virtual Kind kind() const { return _kind; } private: @@ -69,6 +70,6 @@ protected: std::unique_ptr<File> _file; }; -} // namespace lld +} // end namespace lld #endif // LLD_CORE_NODE_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Parallel.h b/contrib/llvm/tools/lld/include/lld/Core/Parallel.h index 2dde97d9e3f0..f241453a4d39 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Parallel.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Parallel.h @@ -121,7 +121,7 @@ public: // Spawn all but one of the threads in another thread as spawning threads // can take a while. std::thread([&, threadCount] { - for (std::size_t i = 1; i < threadCount; ++i) { + for (size_t i = 1; i < threadCount; ++i) { std::thread([=] { work(); }).detach(); @@ -270,26 +270,65 @@ template <class T> void parallel_sort(T *start, T *end) { } #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -template <class Iterator, class Func> -void parallel_for_each(Iterator begin, Iterator end, Func func) { - std::for_each(begin, end, func); +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + std::for_each(Begin, End, Fn); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + for (IndexTy I = Begin; I != End; ++I) + Fn(I); } #elif defined(_MSC_VER) // Use ppl parallel_for_each on Windows. -template <class Iterator, class Func> -void parallel_for_each(Iterator begin, Iterator end, Func func) { - concurrency::parallel_for_each(begin, end, func); +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); } #else -template <class Iterator, class Func> -void parallel_for_each(Iterator begin, Iterator end, Func func) { - TaskGroup tg; - ptrdiff_t taskSize = 1024; - while (taskSize <= std::distance(begin, end)) { - tg.spawn([=, &func] { std::for_each(begin, begin + taskSize, func); }); - begin += taskSize; +template <class IterTy, class FuncTy> +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + // TaskGroup has a relatively high overhead, so we want to reduce + // the number of spawn() calls. We'll create up to 1024 tasks here. + // (Note that 1024 is an arbitrary number. This code probably needs + // improving to take the number of available cores into account.) + ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + while (TaskSize <= std::distance(Begin, End)) { + Tg.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); + Begin += TaskSize; } - std::for_each(begin, end, func); + Tg.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); +} + +template <class IndexTy, class FuncTy> +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + ptrdiff_t TaskSize = (End - Begin) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + IndexTy I = Begin; + for (; I < End; I += TaskSize) { + Tg.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + Begin += TaskSize; + } + Tg.spawn([=, &Fn] { + for (IndexTy J = I; J < End; ++J) + Fn(J); + }); } #endif } // end namespace lld diff --git a/contrib/llvm/tools/lld/include/lld/Core/Pass.h b/contrib/llvm/tools/lld/include/lld/Core/Pass.h index 0527f02cd362..bfe3f9b10e0c 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Pass.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Pass.h @@ -1,4 +1,4 @@ -//===------ Core/Pass.h - Base class for linker passes --------------------===// +//===------ Core/Pass.h - Base class for linker passes ----------*- C++ -*-===// // // The LLVM Linker // @@ -10,13 +10,10 @@ #ifndef LLD_CORE_PASS_H #define LLD_CORE_PASS_H -#include "lld/Core/Atom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" #include "llvm/Support/Error.h" -#include <vector> namespace lld { + class SimpleFile; /// Once the core linking is done (which resolves references, coalesces atoms @@ -31,16 +28,16 @@ class SimpleFile; /// new Atoms to the graph using the File's addAtom() method. class Pass { public: - virtual ~Pass() { } + virtual ~Pass() = default; /// Do the actual work of the Pass. virtual llvm::Error perform(SimpleFile &mergedFile) = 0; protected: // Only subclassess can be instantiated. - Pass() { } + Pass() = default; }; -} // namespace lld +} // end namespace lld #endif // LLD_CORE_PASS_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/PassManager.h b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h index 71a25cc7f3cd..09b417a2985d 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/PassManager.h +++ b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h @@ -36,7 +36,7 @@ public: for (std::unique_ptr<Pass> &pass : _passes) if (llvm::Error EC = pass->perform(file)) return EC; - return llvm::Error(); + return llvm::Error::success(); } private: diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reader.h b/contrib/llvm/tools/lld/include/lld/Core/Reader.h index 66df4380dc76..5105eb1aa2be 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Reader.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Reader.h @@ -12,9 +12,10 @@ #include "lld/Core/LLVM.h" #include "lld/Core/Reference.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/YAMLTraits.h" -#include <functional> +#include "llvm/Support/MemoryBuffer.h" #include <memory> #include <vector> @@ -23,10 +24,11 @@ using llvm::sys::fs::file_magic; namespace llvm { namespace yaml { class IO; -} -} +} // end namespace yaml +} // end namespace llvm namespace lld { + class File; class LinkingContext; class MachOLinkingContext; @@ -37,7 +39,7 @@ class MachOLinkingContext; /// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader. class Reader { public: - virtual ~Reader() {} + virtual ~Reader() = default; /// Sniffs the file to determine if this Reader can parse it. /// The method is called with: @@ -52,7 +54,6 @@ public: loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &) const = 0; }; - /// \brief An abstract class for handling alternate yaml representations /// of object files. /// @@ -74,7 +75,6 @@ public: virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0; }; - /// A registry to hold the list of currently registered Readers and /// tables which map Reference kind values to strings. /// The linker does not directly invoke Readers. Instead, it registers @@ -127,7 +127,6 @@ public: void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch, const KindStrings array[]); - private: struct KindEntry { Reference::KindNamespace ns; @@ -154,4 +153,4 @@ private: } // end namespace lld -#endif +#endif // LLD_CORE_READER_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reference.h b/contrib/llvm/tools/lld/include/lld/Core/Reference.h index 86de4f6a4236..1d3003c84616 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Reference.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Reference.h @@ -1,4 +1,4 @@ -//===- Core/References.h - A Reference to Another Atom --------------------===// +//===- Core/References.h - A Reference to Another Atom ----------*- C++ -*-===// // // The LLVM Linker // @@ -10,10 +10,10 @@ #ifndef LLD_CORE_REFERENCES_H #define LLD_CORE_REFERENCES_H -#include "lld/Core/LLVM.h" -#include "llvm/ADT/StringSwitch.h" +#include <cstdint> namespace lld { + class Atom; /// @@ -107,13 +107,13 @@ protected: /// object. Therefore, no one but the owning File object should call /// delete on an Reference. In fact, some File objects may bulk allocate /// an array of References, so they cannot be individually deleted by anyone. - virtual ~Reference() {} + virtual ~Reference() = default; KindValue _kindValue; uint8_t _kindNamespace; uint8_t _kindArch; }; -} // namespace lld +} // end namespace lld #endif // LLD_CORE_REFERENCES_H diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h b/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h new file mode 100644 index 000000000000..cf2747493834 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Core/Reproduce.h @@ -0,0 +1,73 @@ +//===- Reproduce.h - Utilities for creating reproducers ---------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_REPRODUCE_H +#define LLD_CORE_REPRODUCE_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Error.h" + +namespace llvm { + +class raw_fd_ostream; + +namespace opt { class Arg; } + +} + +namespace lld { + +// This class creates a .cpio file for --reproduce (ELF) or /linkrepro (COFF). +// +// If "--reproduce foo" is given, we create a file "foo.cpio" and +// copy all input files to the archive, along with a response file +// to re-run the same command with the same inputs. +// It is useful for reporting issues to LLD developers. +// +// Cpio as a file format is a deliberate choice. It's standardized in +// POSIX and very easy to create. cpio command is available virtually +// on all Unix systems. See +// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07 +// for the format details. +class CpioFile { +public: + static ErrorOr<CpioFile *> create(StringRef OutputPath); + void append(StringRef Path, StringRef Data); + +private: + CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename); + + std::unique_ptr<llvm::raw_fd_ostream> OS; + llvm::StringSet<> Seen; + std::string Basename; +}; + +// Makes a given pathname an absolute path first, and then remove +// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", +// assuming that the current directory is "/home/john/bar". +std::string relativeToRoot(StringRef Path); + +// Quote a given string if it contains a space character. +std::string quote(StringRef S); + +// Rewrite the given path if a file exists with that pathname, otherwise +// returns the original path. +std::string rewritePath(StringRef S); + +// Returns the string form of the given argument. +std::string stringize(llvm::opt::Arg *Arg); + +// Replaces backslashes with slashes if Windows. +std::string convertToUnixPathSeparator(StringRef S); +} + +#endif diff --git a/contrib/llvm/tools/lld/include/lld/Core/Simple.h b/contrib/llvm/tools/lld/include/lld/Core/Simple.h index f75b40327db4..3aa7abf5d12b 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/Simple.h +++ b/contrib/llvm/tools/lld/include/lld/Core/Simple.h @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -111,19 +112,17 @@ private: AtomVector<AbsoluteAtom> _absolute; }; -class SimpleReference : public Reference { +class SimpleReference : public Reference, + public llvm::ilist_node<SimpleReference> { public: SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch, Reference::KindValue value, uint64_t off, const Atom *t, Reference::Addend a) - : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a), - _next(nullptr), _prev(nullptr) { + : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a) { } SimpleReference() : Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0), - _target(nullptr), _offsetInAtom(0), _addend(0), _next(nullptr), - _prev(nullptr) { - } + _target(nullptr), _offsetInAtom(0), _addend(0) {} uint64_t offsetInAtom() const override { return _offsetInAtom; } @@ -135,72 +134,17 @@ public: Addend addend() const override { return _addend; } void setAddend(Addend a) override { _addend = a; } void setTarget(const Atom *newAtom) override { _target = newAtom; } - SimpleReference *getNext() const { return _next; } - SimpleReference *getPrev() const { return _prev; } - void setNext(SimpleReference *n) { _next = n; } - void setPrev(SimpleReference *p) { _prev = p; } private: const Atom *_target; uint64_t _offsetInAtom; Addend _addend; - SimpleReference *_next; - SimpleReference *_prev; -}; - -} // end namespace lld - -// ilist will lazily create a sentinal (so end() can return a node past the -// end of the list). We need this trait so that the sentinal is allocated -// via the BumpPtrAllocator. -namespace llvm { - -template<> -struct ilist_sentinel_traits<lld::SimpleReference> { - - ilist_sentinel_traits() : _allocator(nullptr) { } - - void setAllocator(llvm::BumpPtrAllocator *alloc) { - _allocator = alloc; - } - - lld::SimpleReference *createSentinel() const { - return new (*_allocator) lld::SimpleReference(); - } - - static void destroySentinel(lld::SimpleReference*) {} - - static lld::SimpleReference *provideInitialHead() { return nullptr; } - - lld::SimpleReference *ensureHead(lld::SimpleReference *&head) const { - if (!head) { - head = createSentinel(); - noteHead(head, head); - ilist_traits<lld::SimpleReference>::setNext(head, nullptr); - return head; - } - return ilist_traits<lld::SimpleReference>::getPrev(head); - } - - void noteHead(lld::SimpleReference *newHead, - lld::SimpleReference *sentinel) const { - ilist_traits<lld::SimpleReference>::setPrev(newHead, sentinel); - } - -private: - mutable llvm::BumpPtrAllocator *_allocator; }; -} // end namespace llvm - -namespace lld { - class SimpleDefinedAtom : public DefinedAtom { public: explicit SimpleDefinedAtom(const File &f) - : _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) { - _references.setAllocator(&f.allocator()); - } + : _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) {} ~SimpleDefinedAtom() override { _references.clearAndLeakNodesUnsafely(); @@ -232,23 +176,26 @@ public: } DefinedAtom::reference_iterator begin() const override { - const void *it = reinterpret_cast<const void *>(&*_references.begin()); + const void *it = + reinterpret_cast<const void *>(_references.begin().getNodePtr()); return reference_iterator(*this, it); } DefinedAtom::reference_iterator end() const override { - const void *it = reinterpret_cast<const void *>(&*_references.end()); + const void *it = + reinterpret_cast<const void *>(_references.end().getNodePtr()); return reference_iterator(*this, it); } const Reference *derefIterator(const void *it) const override { - return reinterpret_cast<const Reference*>(it); + return &*RefList::const_iterator( + *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it)); } void incrementIterator(const void *&it) const override { - const SimpleReference* node = reinterpret_cast<const SimpleReference*>(it); - const SimpleReference* next = node->getNext(); - it = reinterpret_cast<const void*>(next); + RefList::const_iterator ref( + *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it)); + it = reinterpret_cast<const void *>(std::next(ref).getNodePtr()); } void addReference(Reference::KindNamespace ns, diff --git a/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h index db610ad14066..ba4951e5bd13 100644 --- a/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h +++ b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h @@ -46,22 +46,12 @@ public: /// @brief add atom to symbol table bool add(const AbsoluteAtom &); - /// @brief checks if name is in symbol table and if so atom is not - /// UndefinedAtom - bool isDefined(StringRef sym); - /// @brief returns atom in symbol table for specified name (or nullptr) const Atom *findByName(StringRef sym); /// @brief returns vector of remaining UndefinedAtoms std::vector<const UndefinedAtom *> undefines(); - /// returns vector of tentative definitions - std::vector<StringRef> tentativeDefinitions(); - - /// @brief add atom to replacement table - void addReplacement(const Atom *replaced, const Atom *replacement); - /// @brief if atom has been coalesced away, return replacement, else return atom const Atom *replacement(const Atom *); diff --git a/contrib/llvm/tools/lld/include/lld/Driver/Driver.h b/contrib/llvm/tools/lld/include/lld/Driver/Driver.h index 312f4f812b77..a3265c85716a 100644 --- a/contrib/llvm/tools/lld/include/lld/Driver/Driver.h +++ b/contrib/llvm/tools/lld/include/lld/Driver/Driver.h @@ -19,7 +19,7 @@ bool link(llvm::ArrayRef<const char *> Args); } namespace elf { -bool link(llvm::ArrayRef<const char *> Args, +bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly, llvm::raw_ostream &Diag = llvm::errs()); } diff --git a/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h index 7b673f0dad3e..a9e80f50b23d 100644 --- a/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h +++ b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h @@ -377,6 +377,10 @@ public: uint32_t dylibCompatVersion(StringRef installName) const; + ArrayRef<mach_o::MachODylibFile*> allDylibs() const { + return _allDylibs; + } + /// Creates a copy (owned by this MachOLinkingContext) of a string. StringRef copy(StringRef str) { return str.copy(_allocator); } @@ -485,7 +489,7 @@ private: mutable std::unique_ptr<Writer> _writer; std::vector<SectionAlign> _sectAligns; mutable llvm::StringMap<mach_o::MachODylibFile*> _pathToDylibMap; - mutable std::set<mach_o::MachODylibFile*> _allDylibs; + mutable std::vector<mach_o::MachODylibFile*> _allDylibs; mutable std::set<mach_o::MachODylibFile*> _upwardDylibs; mutable std::vector<std::unique_ptr<File>> _indirectDylibs; mutable std::mutex _dylibsMutex; diff --git a/contrib/llvm/tools/lld/include/lld/Support/Memory.h b/contrib/llvm/tools/lld/include/lld/Support/Memory.h new file mode 100644 index 000000000000..46db4a39f696 --- /dev/null +++ b/contrib/llvm/tools/lld/include/lld/Support/Memory.h @@ -0,0 +1,63 @@ +//===- Memory.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines arena allocators. +// +// Almost all large objects, such as files, sections or symbols, are +// used for the entire lifetime of the linker once they are created. +// This usage characteristic makes arena allocator an attractive choice +// where the entire linker is one arena. With an arena, newly created +// objects belong to the arena and freed all at once when everything is done. +// Arena allocators are efficient and easy to understand. +// Most objects are allocated using the arena allocators defined by this file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MEMORY_H +#define LLD_MEMORY_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" +#include <vector> + +namespace lld { + +// Use this arena if your object doesn't have a destructor. +extern llvm::BumpPtrAllocator BAlloc; +extern llvm::StringSaver Saver; + +// These two classes are hack to keep track of all +// SpecificBumpPtrAllocator instances. +struct SpecificAllocBase { + SpecificAllocBase() { Instances.push_back(this); } + virtual ~SpecificAllocBase() = default; + virtual void reset() = 0; + static std::vector<SpecificAllocBase *> Instances; +}; + +template <class T> struct SpecificAlloc : public SpecificAllocBase { + void reset() override { Alloc.DestroyAll(); } + llvm::SpecificBumpPtrAllocator<T> Alloc; +}; + +// Use this arena if your object has a destructor. +// Your destructor will be invoked from freeArena(). +template <typename T, typename... U> inline T *make(U &&... Args) { + static SpecificAlloc<T> Alloc; + return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...); +} + +inline void freeArena() { + for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances) + Alloc->reset(); + BAlloc.Reset(); +} +} + +#endif diff --git a/contrib/llvm/tools/lld/lib/Config/Version.cpp b/contrib/llvm/tools/lld/lib/Config/Version.cpp index 60687b9d8940..25544756f8be 100644 --- a/contrib/llvm/tools/lld/lib/Config/Version.cpp +++ b/contrib/llvm/tools/lld/lib/Config/Version.cpp @@ -12,46 +12,32 @@ //===----------------------------------------------------------------------===// #include "lld/Config/Version.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace lld { - -StringRef getLLDRepositoryPath() { -#ifdef LLD_REPOSITORY_STRING - return LLD_REPOSITORY_STRING; -#else - return ""; -#endif +// Returns an SVN repository path, which is usually "trunk". +static std::string getRepositoryPath() { + StringRef S = LLD_REPOSITORY_STRING; + size_t Pos = S.find("lld/"); + if (Pos != StringRef::npos) + return S.substr(Pos + 4); + return S; } -StringRef getLLDRevision() { -#ifdef LLD_REVISION_STRING - return LLD_REVISION_STRING; -#else - return ""; -#endif -} +// Returns an SVN repository name, e.g., " (trunk 284614)" +// or an empty string if no repository info is available. +static std::string getRepository() { + std::string Repo = getRepositoryPath(); + std::string Rev = LLD_REVISION_STRING; -std::string getLLDRepositoryVersion() { - std::string S = getLLDRepositoryPath(); - std::string T = getLLDRevision(); - if (S.empty() && T.empty()) + if (Repo.empty() && Rev.empty()) return ""; - if (!S.empty() && !T.empty()) - return "(" + S + " " + T + ")"; - if (!S.empty()) - return "(" + S + ")"; - return "(" + T + ")"; + if (!Repo.empty() && !Rev.empty()) + return " (" + Repo + " " + Rev + ")"; + return " (" + Repo + Rev + ")"; } -StringRef getLLDVersion() { -#ifdef LLD_VERSION_STRING - return LLD_VERSION_STRING; -#else - return ""; -#endif +// Returns a version string, e.g., "LLD 4.0 (lld/trunk 284614)". +std::string lld::getLLDVersion() { + return "LLD " + std::string(LLD_VERSION_STRING) + getRepository(); } - -} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt index 41e0e7661b9c..d89ca4a63d72 100644 --- a/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt +++ b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt @@ -4,6 +4,7 @@ add_lld_library(lldCore File.cpp LinkingContext.cpp Reader.cpp + Reproduce.cpp Resolver.cpp SymbolTable.cpp Writer.cpp diff --git a/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp index 8dc4d4a16f96..177cae7fcbf0 100644 --- a/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp +++ b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp @@ -79,16 +79,4 @@ DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { llvm_unreachable("unknown content type"); } -bool DefinedAtom::compareByPosition(const DefinedAtom *lhs, - const DefinedAtom *rhs) { - if (lhs == rhs) - return false; - const File *lhsFile = &lhs->file(); - const File *rhsFile = &rhs->file(); - if (lhsFile->ordinal() != rhsFile->ordinal()) - return lhsFile->ordinal() < rhsFile->ordinal(); - assert(lhs->ordinal() != rhs->ordinal()); - return lhs->ordinal() < rhs->ordinal(); -} - } // namespace diff --git a/contrib/llvm/tools/lld/lib/Core/Error.cpp b/contrib/llvm/tools/lld/lib/Core/Error.cpp index 4df1ce120bd9..6fc76f7ca3d0 100644 --- a/contrib/llvm/tools/lld/lib/Core/Error.cpp +++ b/contrib/llvm/tools/lld/lib/Core/Error.cpp @@ -16,9 +16,10 @@ using namespace lld; +namespace { class _YamlReaderErrorCategory : public std::error_category { public: - const char* name() const LLVM_NOEXCEPT override { + const char* name() const noexcept override { return "lld.yaml.reader"; } @@ -33,6 +34,7 @@ public: "message defined."); } }; +} // end anonymous namespace const std::error_category &lld::YamlReaderCategory() { static _YamlReaderErrorCategory o; @@ -48,7 +50,7 @@ class dynamic_error_category : public std::error_category { public: ~dynamic_error_category() override = default; - const char *name() const LLVM_NOEXCEPT override { + const char *name() const noexcept override { return "lld.dynamic_error"; } diff --git a/contrib/llvm/tools/lld/lib/Core/File.cpp b/contrib/llvm/tools/lld/lib/Core/File.cpp index b84132bfecd5..30ded091a92a 100644 --- a/contrib/llvm/tools/lld/lib/Core/File.cpp +++ b/contrib/llvm/tools/lld/lib/Core/File.cpp @@ -8,12 +8,11 @@ //===----------------------------------------------------------------------===// #include "lld/Core/File.h" -#include "lld/Core/LLVM.h" #include <mutex> namespace lld { -File::~File() { } +File::~File() = default; File::AtomVector<DefinedAtom> File::_noDefinedAtoms; File::AtomVector<UndefinedAtom> File::_noUndefinedAtoms; @@ -27,4 +26,4 @@ std::error_code File::parse() { return _lastError.getValue(); } -} // namespace lld +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp index 2732543d306e..5de863aa7f37 100644 --- a/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp +++ b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp @@ -8,16 +8,17 @@ //===----------------------------------------------------------------------===// #include "lld/Core/LinkingContext.h" -#include "lld/Core/Resolver.h" +#include "lld/Core/File.h" +#include "lld/Core/Node.h" #include "lld/Core/Simple.h" #include "lld/Core/Writer.h" -#include "llvm/ADT/Triple.h" +#include <algorithm> namespace lld { -LinkingContext::LinkingContext() {} +LinkingContext::LinkingContext() = default; -LinkingContext::~LinkingContext() {} +LinkingContext::~LinkingContext() = default; bool LinkingContext::validate(raw_ostream &diagnostics) { return validateImpl(diagnostics); @@ -59,7 +60,7 @@ LinkingContext::createUndefinedSymbolFile(StringRef filename) const { } void LinkingContext::createInternalFiles( - std::vector<std::unique_ptr<File> > &result) const { + std::vector<std::unique_ptr<File>> &result) const { if (std::unique_ptr<File> file = createEntrySymbolFile()) result.push_back(std::move(file)); if (std::unique_ptr<File> file = createUndefinedSymbolFile()) diff --git a/contrib/llvm/tools/lld/lib/Core/Reader.cpp b/contrib/llvm/tools/lld/lib/Core/Reader.cpp index 107db07891da..24652abec688 100644 --- a/contrib/llvm/tools/lld/lib/Core/Reader.cpp +++ b/contrib/llvm/tools/lld/lib/Core/Reader.cpp @@ -7,18 +7,19 @@ // //===----------------------------------------------------------------------===// -#include "lld/Core/File.h" #include "lld/Core/Reader.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include <algorithm> #include <memory> -#include <system_error> namespace lld { -YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() {} +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() = default; void Registry::add(std::unique_ptr<Reader> reader) { _readers.push_back(std::move(reader)); @@ -63,7 +64,6 @@ bool Registry::handleTaggedDoc(llvm::yaml::IO &io, return false; } - void Registry::addKindTable(Reference::KindNamespace ns, Reference::KindArch arch, const KindStrings array[]) { diff --git a/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp b/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp new file mode 100644 index 000000000000..39b0e41c44e5 --- /dev/null +++ b/contrib/llvm/tools/lld/lib/Core/Reproduce.cpp @@ -0,0 +1,128 @@ +//===- Reproduce.cpp - Utilities for creating reproducers -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Reproduce.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Option/Arg.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" + +using namespace lld; +using namespace llvm; +using namespace sys; + +CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S) + : OS(std::move(OS)), Basename(S) {} + +ErrorOr<CpioFile *> CpioFile::create(StringRef OutputPath) { + std::string Path = (OutputPath + ".cpio").str(); + std::error_code EC; + auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, sys::fs::F_None); + if (EC) + return EC; + return new CpioFile(std::move(OS), path::filename(OutputPath)); +} + +static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) { + // The c_dev/c_ino pair should be unique according to the spec, + // but no one seems to care. + OS << "070707"; // c_magic + OS << "000000"; // c_dev + OS << "000000"; // c_ino + OS << "100664"; // c_mode: C_ISREG | rw-rw-r-- + OS << "000000"; // c_uid + OS << "000000"; // c_gid + OS << "000001"; // c_nlink + OS << "000000"; // c_rdev + OS << "00000000000"; // c_mtime + OS << format("%06o", Path.size() + 1); // c_namesize + OS << format("%011o", Data.size()); // c_filesize + OS << Path << '\0'; // c_name + OS << Data; // c_filedata +} + +void CpioFile::append(StringRef Path, StringRef Data) { + if (!Seen.insert(Path).second) + return; + + // Construct an in-archive filename so that /home/foo/bar is stored + // as baz/home/foo/bar where baz is the basename of the output file. + // (i.e. in that case we are creating baz.cpio.) + SmallString<128> Fullpath; + path::append(Fullpath, Basename, Path); + + writeMember(*OS, convertToUnixPathSeparator(Fullpath), Data); + + // Print the trailer and seek back. + // This way we have a valid archive if we crash. + uint64_t Pos = OS->tell(); + writeMember(*OS, "TRAILER!!!", ""); + OS->seek(Pos); +} + +// Makes a given pathname an absolute path first, and then remove +// beginning /. For example, "../foo.o" is converted to "home/john/foo.o", +// assuming that the current directory is "/home/john/bar". +// Returned string is a forward slash separated path even on Windows to avoid +// a mess with backslash-as-escape and backslash-as-path-separator. +std::string lld::relativeToRoot(StringRef Path) { + SmallString<128> Abs = Path; + if (sys::fs::make_absolute(Abs)) + return Path; + path::remove_dots(Abs, /*remove_dot_dot=*/true); + + // This is Windows specific. root_name() returns a drive letter + // (e.g. "c:") or a UNC name (//net). We want to keep it as part + // of the result. + SmallString<128> Res; + StringRef Root = path::root_name(Abs); + if (Root.endswith(":")) + Res = Root.drop_back(); + else if (Root.startswith("//")) + Res = Root.substr(2); + + path::append(Res, path::relative_path(Abs)); + return convertToUnixPathSeparator(Res); +} + +// Quote a given string if it contains a space character. +std::string lld::quote(StringRef S) { + if (S.find(' ') == StringRef::npos) + return S; + return ("\"" + S + "\"").str(); +} + +std::string lld::rewritePath(StringRef S) { + if (fs::exists(S)) + return relativeToRoot(S); + return S; +} + +std::string lld::stringize(opt::Arg *Arg) { + std::string K = Arg->getSpelling(); + if (Arg->getNumValues() == 0) + return K; + std::string V = quote(Arg->getValue()); + if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle) + return K + V; + return K + " " + V; +} + +std::string lld::convertToUnixPathSeparator(StringRef S) { +#ifdef LLVM_ON_WIN32 + std::string Ret = S.str(); + std::replace(Ret.begin(), Ret.end(), '\\', '/'); + return Ret; +#else + return S; +#endif +} diff --git a/contrib/llvm/tools/lld/lib/Core/Resolver.cpp b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp index ef694fd972fc..e7cfaaac7835 100644 --- a/contrib/llvm/tools/lld/lib/Core/Resolver.cpp +++ b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp @@ -100,7 +100,7 @@ llvm::Error Resolver::handleSharedLibrary(File &file) { if (auto ec = undefAddedOrError.takeError()) return ec; - return llvm::Error(); + return llvm::Error::success(); } bool Resolver::doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom) { diff --git a/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp index 44631a5d40dc..cacea5f30847 100644 --- a/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp @@ -223,13 +223,9 @@ bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, const DefinedAtom * const r) { if (l == r) return true; - if (l == getEmptyKey()) + if (l == getEmptyKey() || r == getEmptyKey()) return false; - if (r == getEmptyKey()) - return false; - if (l == getTombstoneKey()) - return false; - if (r == getTombstoneKey()) + if (l == getTombstoneKey() || r == getTombstoneKey()) return false; if (l->contentType() != r->contentType()) return false; @@ -265,17 +261,6 @@ const Atom *SymbolTable::findByName(StringRef sym) { return pos->second; } -bool SymbolTable::isDefined(StringRef sym) { - if (const Atom *atom = findByName(sym)) - return !isa<UndefinedAtom>(atom); - return false; -} - -void SymbolTable::addReplacement(const Atom *replaced, - const Atom *replacement) { - _replacedAtoms[replaced] = replacement; -} - const Atom *SymbolTable::replacement(const Atom *atom) { // Find the replacement for a given atom. Atoms in _replacedAtoms // may be chained, so find the last one. @@ -303,17 +288,4 @@ std::vector<const UndefinedAtom *> SymbolTable::undefines() { return ret; } -std::vector<StringRef> SymbolTable::tentativeDefinitions() { - std::vector<StringRef> ret; - for (auto entry : _nameTable) { - const Atom *atom = entry.second; - StringRef name = entry.first; - assert(atom != nullptr); - if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) - if (defAtom->merge() == DefinedAtom::mergeAsTentative) - ret.push_back(name); - } - return ret; -} - } // namespace lld diff --git a/contrib/llvm/tools/lld/lib/Core/Writer.cpp b/contrib/llvm/tools/lld/lib/Core/Writer.cpp index 93e6438a28f5..51f95bc5053a 100644 --- a/contrib/llvm/tools/lld/lib/Core/Writer.cpp +++ b/contrib/llvm/tools/lld/lib/Core/Writer.cpp @@ -7,13 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "lld/Core/File.h" #include "lld/Core/Writer.h" namespace lld { -Writer::Writer() { -} -Writer::~Writer() { -} +Writer::Writer() = default; + +Writer::~Writer() = default; + } // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp index 496b651bab4f..9b4aede19aa2 100644 --- a/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp +++ b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp @@ -14,24 +14,45 @@ //===----------------------------------------------------------------------===// #include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Error.h" #include "lld/Core/File.h" #include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Node.h" #include "lld/Core/PassManager.h" #include "lld/Core/Resolver.h" #include "lld/Core/SharedLibraryFile.h" -#include "lld/Driver/Driver.h" +#include "lld/Core/Simple.h" +#include "lld/Core/LinkingContext.h" #include "lld/ReaderWriter/MachOLinkingContext.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> using namespace lld; @@ -116,7 +137,7 @@ loadFile(MachOLinkingContext &ctx, StringRef path, return files; } -} // anonymous namespace +} // end anonymous namespace // Test may be running on Windows. Canonicalize the path // separator to '/' to get consistent outputs for tests. @@ -166,8 +187,6 @@ static std::error_code parseExportsList(StringRef exportFilePath, return std::error_code(); } - - /// Order files are one symbol per line. Blank lines are ignored. /// Trailing comments start with #. Symbol names can be prefixed with an /// architecture name and/or .o leaf name. Examples: @@ -270,7 +289,7 @@ static llvm::Error loadFileList(StringRef fileListPath, addFile(path, ctx, forceLoad, false, diagnostics); buffer = lineAndRest.second; } - return llvm::Error(); + return llvm::Error::success(); } /// Parse number assuming it is base 16, but allow 0x prefix. @@ -739,9 +758,10 @@ bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx, } break; case MachOLinkingContext::OS::iOS_simulator: - if (pie->getOption().getID() == OPT_no_pie) + if (pie->getOption().getID() == OPT_no_pie) { diagnostics << "iOS simulator programs must be built PIE\n"; return false; + } break; case MachOLinkingContext::OS::unknown: break; @@ -759,7 +779,6 @@ bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx, diagnostics << pie->getSpelling() << " can only used when linking main executables\n"; return false; - break; } } @@ -1136,6 +1155,18 @@ bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx, return ctx.validate(diagnostics); } +static void createFiles(MachOLinkingContext &ctx, bool Implicit) { + std::vector<std::unique_ptr<File>> Files; + if (Implicit) + ctx.createImplicitFiles(Files); + else + ctx.createInternalFiles(Files); + for (auto i = Files.rbegin(), e = Files.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } +} + /// This is where the link is actually performed. bool link(llvm::ArrayRef<const char *> args, raw_ostream &diagnostics) { MachOLinkingContext ctx; @@ -1150,20 +1181,10 @@ bool link(llvm::ArrayRef<const char *> args, raw_ostream &diagnostics) { if (FileNode *node = dyn_cast<FileNode>(ie.get())) node->getFile()->parse(); - std::vector<std::unique_ptr<File>> internalFiles; - ctx.createInternalFiles(internalFiles); - for (auto i = internalFiles.rbegin(), e = internalFiles.rend(); i != e; ++i) { - auto &members = ctx.getNodes(); - members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); - } + createFiles(ctx, false /* Implicit */); - // Give target a chance to add files. - std::vector<std::unique_ptr<File>> implicitFiles; - ctx.createImplicitFiles(implicitFiles); - for (auto i = implicitFiles.rbegin(), e = implicitFiles.rend(); i != e; ++i) { - auto &members = ctx.getNodes(); - members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); - } + // Give target a chance to add files + createFiles(ctx, true /* Implicit */); // Give target a chance to postprocess input files. // Mach-O uses this chance to move all object files before library files. @@ -1211,5 +1232,6 @@ bool link(llvm::ArrayRef<const char *> args, raw_ostream &diagnostics) { return true; } -} // namespace mach_o -} // namespace lld + +} // end namespace mach_o +} // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp index eb7e7fb1837b..799f947a8c82 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp @@ -52,9 +52,12 @@ public: Archive::Child c = member->second; // Don't return a member already returned - ErrorOr<StringRef> buf = c.getBuffer(); - if (!buf) + Expected<StringRef> buf = c.getBuffer(); + if (!buf) { + // TODO: Actually report errors helpfully. + consumeError(buf.takeError()); return nullptr; + } const char *memberStart = buf->data(); if (_membersInstantiated.count(memberStart)) return nullptr; @@ -76,7 +79,7 @@ public: parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { if (std::error_code ec = parse()) return ec; - llvm::Error err; + llvm::Error err = llvm::Error::success(); for (auto mf = _archive->child_begin(err), me = _archive->child_end(); mf != me; ++mf) { std::unique_ptr<File> file; @@ -119,7 +122,7 @@ public: protected: std::error_code doParse() override { // Make Archive object which will be owned by FileArchive object. - llvm::Error Err; + llvm::Error Err = llvm::Error::success(); _archive.reset(new Archive(_mb->getMemBufferRef(), Err)); if (Err) return errorToErrorCode(std::move(Err)); @@ -132,9 +135,9 @@ protected: private: std::error_code instantiateMember(Archive::Child member, std::unique_ptr<File> &result) const { - ErrorOr<llvm::MemoryBufferRef> mbOrErr = member.getMemoryBufferRef(); - if (std::error_code ec = mbOrErr.getError()) - return ec; + Expected<llvm::MemoryBufferRef> mbOrErr = member.getMemoryBufferRef(); + if (!mbOrErr) + return errorToErrorCode(mbOrErr.takeError()); llvm::MemoryBufferRef mb = mbOrErr.get(); std::string memberPath = (_archive->getFileName() + "(" + mb.getBufferIdentifier() + ")").str(); @@ -166,9 +169,9 @@ private: << _archive->getFileName() << "':\n"); for (const Archive::Symbol &sym : _archive->symbols()) { StringRef name = sym.getName(); - ErrorOr<Archive::Child> memberOrErr = sym.getMember(); - if (std::error_code ec = memberOrErr.getError()) - return ec; + Expected<Archive::Child> memberOrErr = sym.getMember(); + if (!memberOrErr) + return errorToErrorCode(memberOrErr.takeError()); Archive::Child member = memberOrErr.get(); DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp index 3286fe064535..7d1544854cf1 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -540,7 +540,7 @@ llvm::Error ArchHandler_arm::getReferenceInfo( // Instruction contains branch to addend. displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); *addend = fixupAddress + 4 + displacement; - return llvm::Error(); + return llvm::Error::success(); case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: // ex: bl _foo (and _foo is defined) if ((instruction & 0xD000F800) == 0x9000F000) @@ -563,7 +563,7 @@ llvm::Error ArchHandler_arm::getReferenceInfo( // reloc.value is target atom's address. Instruction contains branch // to atom+addend. *addend += (targetAddress - reloc.value); - return llvm::Error(); + return llvm::Error::success(); case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: // ex: bl _foo (and _foo is undefined) if (((instruction & 0x0F000000) == 0x0A000000) @@ -576,7 +576,7 @@ llvm::Error ArchHandler_arm::getReferenceInfo( // Instruction contains branch to addend. displacement = getDisplacementFromArmBranch(instruction); *addend = fixupAddress + 8 + displacement; - return llvm::Error(); + return llvm::Error::success(); case ARM_RELOC_BR24 | rPcRel | rLength4: // ex: bl _foo (and _foo is defined) if (((instruction & 0x0F000000) == 0x0A000000) @@ -601,32 +601,32 @@ llvm::Error ArchHandler_arm::getReferenceInfo( // reloc.value is target atom's address. Instruction contains branch // to atom+addend. *addend += (targetAddress - reloc.value); - return llvm::Error(); + return llvm::Error::success(); case ARM_RELOC_VANILLA | rExtern | rLength4: // ex: .long _foo (and _foo is undefined) *kind = pointer32; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = instruction; - return llvm::Error(); + return llvm::Error::success(); case ARM_RELOC_VANILLA | rLength4: // ex: .long _foo (and _foo is defined) *kind = pointer32; if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend)) return ec; *addend = clearThumbBit((uint32_t) * addend, *target); - return llvm::Error(); + return llvm::Error::success(); case ARM_RELOC_VANILLA | rScattered | rLength4: // ex: .long _foo+a (and _foo is defined) *kind = pointer32; if (auto ec = atomFromAddress(0, reloc.value, target, addend)) return ec; *addend += (clearThumbBit(instruction, *target) - reloc.value); - return llvm::Error(); + return llvm::Error::success(); default: return llvm::make_error<GenericError>("unsupported arm relocation type"); } - return llvm::Error(); + return llvm::Error::success(); } llvm::Error @@ -847,7 +847,7 @@ ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, value = clearThumbBit(value, *target); int64_t ta = (int64_t) value - (toAddress - fromAddress); *addend = ta - offsetInFrom; - return llvm::Error(); + return llvm::Error::success(); } else { uint32_t sectIndex; if (thumbReloc) { @@ -895,7 +895,7 @@ ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, } } - return llvm::Error(); + return llvm::Error::success(); } void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp index a61f6aac05e1..392a1be5b3d0 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -275,8 +275,8 @@ const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { // GOT pointer to dyld_stub_binder { Reference::KindArch::AArch64, pointer64, 0, 0 }, - // arm64 code alignment 2^2 - 2, + // arm64 code alignment 2^1 + 1, // Stub size and code 12, @@ -389,56 +389,56 @@ llvm::Error ArchHandler_arm64::getReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: // ex: adrp x1, _foo@PAGE *kind = page21; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: // ex: ldr x0, [x1, _foo@PAGEOFF] *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: // ex: adrp x1, _foo@GOTPAGE *kind = gotPage21; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: // ex: ldr x0, [x1, _foo@GOTPAGEOFF] *kind = gotOffset12; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: // ex: adrp x1, _foo@TLVPAGE *kind = tlvPage21; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: // ex: ldr x0, [x1, _foo@TLVPAGEOFF] *kind = tlvOffset12; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_UNSIGNED | rExtern | rLength8: // ex: .quad _foo + N *kind = pointer64; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = *(const little64_t *)fixupContent; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_UNSIGNED | rLength8: // ex: .quad Lfoo + N *kind = pointer64; @@ -450,7 +450,7 @@ llvm::Error ArchHandler_arm64::getReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: // ex: .long _foo@GOT - . @@ -464,7 +464,7 @@ llvm::Error ArchHandler_arm64::getReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = 0; - return llvm::Error(); + return llvm::Error::success(); default: return llvm::make_error<GenericError>("unsupported arm64 relocation type"); } @@ -485,7 +485,7 @@ llvm::Error ArchHandler_arm64::getPairReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) return ec; *addend = reloc1.symbol; - return llvm::Error(); + return llvm::Error::success(); case ((ARM64_RELOC_ADDEND | rLength4) << 16 | ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): // ex: adrp x1, _foo@PAGE @@ -493,7 +493,7 @@ llvm::Error ArchHandler_arm64::getPairReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) return ec; *addend = reloc1.symbol; - return llvm::Error(); + return llvm::Error::success(); case ((ARM64_RELOC_ADDEND | rLength4) << 16 | ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): { // ex: ldr w0, [x1, _foo@PAGEOFF] @@ -502,7 +502,7 @@ llvm::Error ArchHandler_arm64::getPairReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) return ec; *addend = reloc1.symbol; - return llvm::Error(); + return llvm::Error::success(); } case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | ARM64_RELOC_UNSIGNED | rExtern | rLength8): @@ -522,7 +522,7 @@ llvm::Error ArchHandler_arm64::getPairReferenceInfo( return llvm::make_error<GenericError>( "paired relocs must have the same offset"); *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; - return llvm::Error(); + return llvm::Error::success(); case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | ARM64_RELOC_UNSIGNED | rExtern | rLength4): // ex: .quad _foo - . @@ -530,7 +530,7 @@ llvm::Error ArchHandler_arm64::getPairReferenceInfo( if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) return ec; *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; - return llvm::Error(); + return llvm::Error::success(); default: return llvm::make_error<GenericError>("unsupported arm64 relocation pair"); } diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp index 15f1f793b5d7..c940ea542ee4 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -345,7 +345,7 @@ ArchHandler_x86::getReferenceInfo(const Relocation &reloc, default: return llvm::make_error<GenericError>("unsupported i386 relocation type"); } - return llvm::Error(); + return llvm::Error::success(); } llvm::Error @@ -403,7 +403,7 @@ ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, *addend = fromAddress + value - toAddress; } } - return llvm::Error(); + return llvm::Error::success(); break; default: return llvm::make_error<GenericError>("unsupported i386 relocation type"); diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp index c36982a77b13..d687ca5de5b4 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp @@ -382,22 +382,22 @@ ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = *(const little32_t *)fixupContent; - return llvm::Error(); + return llvm::Error::success(); case ripRel32Minus1: if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = (int32_t)*(const little32_t *)fixupContent + 1; - return llvm::Error(); + return llvm::Error::success(); case ripRel32Minus2: if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = (int32_t)*(const little32_t *)fixupContent + 2; - return llvm::Error(); + return llvm::Error::success(); case ripRel32Minus4: if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = (int32_t)*(const little32_t *)fixupContent + 4; - return llvm::Error(); + return llvm::Error::success(); case ripRel32Anon: targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; return atomFromAddress(reloc.symbol, targetAddress, target, addend); @@ -416,7 +416,7 @@ ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = *(const little32_t *)fixupContent; - return llvm::Error(); + return llvm::Error::success(); case tlvInitSectionOffset: case pointer64: if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) @@ -429,7 +429,7 @@ ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, assert(*addend == 0 && "TLV-init has non-zero addend?"); } else *addend = *(const little64_t *)fixupContent; - return llvm::Error(); + return llvm::Error::success(); case pointer64Anon: targetAddress = *(const little64_t *)fixupContent; return atomFromAddress(reloc.symbol, targetAddress, target, addend); @@ -463,7 +463,10 @@ ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, return ec; uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent; if (inAtom == fromTarget) { - *kind = delta64; + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; *addend = encodedAddend + offsetInAtom; } else if (inAtom == *target) { *kind = negDelta64; @@ -471,7 +474,7 @@ ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, *target = fromTarget; } else return llvm::make_error<GenericError>("Invalid pointer diff"); - return llvm::Error(); + return llvm::Error::success(); } case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | X86_64_RELOC_UNSIGNED | rExtern | rLength4): { @@ -487,7 +490,7 @@ ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, *target = fromTarget; } else return llvm::make_error<GenericError>("Invalid pointer diff"); - return llvm::Error(); + return llvm::Error::success(); } case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | X86_64_RELOC_UNSIGNED | rLength8): diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt index 70f451c997b3..6a1064d6dfb5 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt @@ -21,8 +21,10 @@ add_lld_library(lldMachO LINK_LIBS lldCore lldYAML + LLVMDebugInfoDWARF LLVMObject LLVMSupport + LLVMDemangle ${PTHREAD_LIB} ) diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp index 6f5ab83dbda6..49d518456a45 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -298,7 +298,7 @@ private: // Skip rest of pass if no unwind info. if (unwindLocs.empty() && dwarfFrames.empty()) - return llvm::Error(); + return llvm::Error::success(); // FIXME: if there are more than 4 personality functions then we need to // defer to DWARF info for the ones we don't put in the list. They should @@ -353,7 +353,7 @@ private: return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; }); - return llvm::Error(); + return llvm::Error::success(); } void collectCompactUnwindEntries( diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h new file mode 100644 index 000000000000..28e41bf4263c --- /dev/null +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h @@ -0,0 +1,106 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_DEBUGINFO_H +#define LLD_READER_WRITER_MACHO_DEBUGINFO_H + +#include "lld/Core/Atom.h" +#include <vector> + +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + + +namespace lld { +namespace mach_o { + +class DebugInfo { +public: + enum class Kind { + Dwarf, + Stabs + }; + + Kind kind() const { return _kind; } + + void setAllocator(std::unique_ptr<llvm::BumpPtrAllocator> allocator) { + _allocator = std::move(allocator); + } + +protected: + DebugInfo(Kind kind) : _kind(kind) {} + +private: + std::unique_ptr<llvm::BumpPtrAllocator> _allocator; + Kind _kind; +}; + +struct TranslationUnitSource { + StringRef name; + StringRef path; +}; + +class DwarfDebugInfo : public DebugInfo { +public: + DwarfDebugInfo(TranslationUnitSource tu) + : DebugInfo(Kind::Dwarf), _tu(std::move(tu)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Dwarf; + } + + const TranslationUnitSource &translationUnitSource() const { return _tu; } + +private: + TranslationUnitSource _tu; +}; + +struct Stab { + Stab(const Atom* atom, uint8_t type, uint8_t other, uint16_t desc, + uint32_t value, StringRef str) + : atom(atom), type(type), other(other), desc(desc), value(value), + str(str) {} + + const class Atom* atom; + uint8_t type; + uint8_t other; + uint16_t desc; + uint32_t value; + StringRef str; +}; + +inline raw_ostream& operator<<(raw_ostream &os, Stab &s) { + os << "Stab -- atom: " << llvm::format("%p", s.atom) << ", type: " << (uint32_t)s.type + << ", other: " << (uint32_t)s.other << ", desc: " << s.desc << ", value: " << s.value + << ", str: '" << s.str << "'"; + return os; +} + +class StabsDebugInfo : public DebugInfo { +public: + + typedef std::vector<Stab> StabsList; + + StabsDebugInfo(StabsList stabs) + : DebugInfo(Kind::Stabs), _stabs(std::move(stabs)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Stabs; + } + + const StabsList& stabs() const { return _stabs; } + +public: + StabsList _stabs; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_DEBUGINFO_H diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h index 64a0fcf82844..2bdd6342b477 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h @@ -11,11 +11,13 @@ #define LLD_READER_WRITER_MACHO_FILE_H #include "Atoms.h" +#include "DebugInfo.h" #include "MachONormalizedFile.h" #include "lld/Core/SharedLibraryFile.h" #include "lld/Core/Simple.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/Format.h" #include <unordered_map> namespace lld { @@ -25,11 +27,15 @@ using lld::mach_o::normalized::Section; class MachOFile : public SimpleFile { public: + + /// Real file constructor - for on-disk files. MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject), _mb(std::move(mb)), _ctx(ctx) {} - MachOFile(StringRef path) : SimpleFile(path, File::kindMachObject) {} + /// Dummy file constructor - for virtual files. + MachOFile(StringRef path) + : SimpleFile(path, File::kindMachObject) {} void addDefinedAtom(StringRef name, Atom::Scope scope, DefinedAtom::ContentType type, DefinedAtom::Merge merge, @@ -225,6 +231,13 @@ public: return F->kind() == File::kindMachObject; } + void setDebugInfo(std::unique_ptr<DebugInfo> debugInfo) { + _debugInfo = std::move(debugInfo); + } + + DebugInfo* debugInfo() const { return _debugInfo.get(); } + std::unique_ptr<DebugInfo> takeDebugInfo() { return std::move(_debugInfo); } + protected: std::error_code doParse() override { // Convert binary file to normalized mach-o. @@ -265,6 +278,7 @@ private: MachOLinkingContext::objc_unknown; uint32_t _swiftVersion = 0; normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + std::unique_ptr<DebugInfo> _debugInfo; }; class MachODylibFile : public SharedLibraryFile { @@ -297,7 +311,7 @@ public: _reExportedDylibs.emplace_back(dylibPath); } - StringRef installName() { return _installName; } + StringRef installName() const { return _installName; } uint32_t currentVersion() { return _currentVersion; } uint32_t compatVersion() { return _compatVersion; } diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp index 6cdca0a9e055..8458a1c79282 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp @@ -134,7 +134,7 @@ private: for (const GOTEntryAtom *slot : entries) mergedFile.addAtom(*slot); - return llvm::Error(); + return llvm::Error::success(); } bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp index dd2ee8567ec9..24dbf79d3e3b 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -474,7 +474,7 @@ llvm::Error LayoutPass::perform(SimpleFile &mergedFile) { }); DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n"); - return llvm::Error(); + return llvm::Error::success(); } void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp index 05375f145d34..db4a96823e74 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/Config/config.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Host.h" @@ -30,10 +30,6 @@ #include "llvm/Support/Path.h" #include <algorithm> -#if defined(HAVE_CXXABI_H) -#include <cxxabi.h> -#endif - using lld::mach_o::ArchHandler; using lld::mach_o::MachOFile; using lld::mach_o::MachODylibFile; @@ -734,7 +730,7 @@ uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { if (pos != _pathToDylibMap.end()) return pos->second->currentVersion(); else - return 0x1000; // 1.0 + return 0x10000; // 1.0 } uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { @@ -742,7 +738,7 @@ uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { if (pos != _pathToDylibMap.end()) return pos->second->compatVersion(); else - return 0x1000; // 1.0 + return 0x10000; // 1.0 } void MachOLinkingContext::createImplicitFiles( @@ -772,7 +768,10 @@ void MachOLinkingContext::createImplicitFiles( void MachOLinkingContext::registerDylib(MachODylibFile *dylib, bool upward) const { std::lock_guard<std::mutex> lock(_dylibsMutex); - _allDylibs.insert(dylib); + + if (std::find(_allDylibs.begin(), + _allDylibs.end(), dylib) == _allDylibs.end()) + _allDylibs.push_back(dylib); _pathToDylibMap[dylib->installName()] = dylib; // If path is different than install name, register path too. if (!dylib->path().equals(dylib->installName())) @@ -873,24 +872,32 @@ std::string MachOLinkingContext::demangle(StringRef symbolName) const { if (!symbolName.startswith("__Z")) return symbolName; -#if defined(HAVE_CXXABI_H) SmallString<256> symBuff; StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); // Mach-O has extra leading underscore that needs to be removed. const char *cstr = nullTermSym.data() + 1; int status; - char *demangled = abi::__cxa_demangle(cstr, nullptr, nullptr, &status); + char *demangled = llvm::itaniumDemangle(cstr, nullptr, nullptr, &status); if (demangled) { std::string result(demangled); // __cxa_demangle() always uses a malloc'ed buffer to return the result. free(demangled); return result; } -#endif return symbolName; } +static void addDependencyInfoHelper(llvm::raw_fd_ostream *DepInfo, + char Opcode, StringRef Path) { + if (!DepInfo) + return; + + *DepInfo << Opcode; + *DepInfo << Path; + *DepInfo << '\0'; +} + std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { std::error_code ec; _dependencyInfo = std::unique_ptr<llvm::raw_fd_ostream>(new @@ -900,42 +907,20 @@ std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { return ec; } - char linkerVersionOpcode = 0x00; - *_dependencyInfo << linkerVersionOpcode; - *_dependencyInfo << "lld"; // FIXME - *_dependencyInfo << '\0'; - + addDependencyInfoHelper(_dependencyInfo.get(), 0x00, "lld" /*FIXME*/); return std::error_code(); } void MachOLinkingContext::addInputFileDependency(StringRef path) const { - if (!_dependencyInfo) - return; - - char inputFileOpcode = 0x10; - *_dependencyInfo << inputFileOpcode; - *_dependencyInfo << path; - *_dependencyInfo << '\0'; + addDependencyInfoHelper(_dependencyInfo.get(), 0x10, path); } void MachOLinkingContext::addInputFileNotFound(StringRef path) const { - if (!_dependencyInfo) - return; - - char inputFileOpcode = 0x11; - *_dependencyInfo << inputFileOpcode; - *_dependencyInfo << path; - *_dependencyInfo << '\0'; + addDependencyInfoHelper(_dependencyInfo.get(), 0x11, path); } void MachOLinkingContext::addOutputFileDependency(StringRef path) const { - if (!_dependencyInfo) - return; - - char outputFileOpcode = 0x40; - *_dependencyInfo << outputFileOpcode; - *_dependencyInfo << path; - *_dependencyInfo << '\0'; + addDependencyInfoHelper(_dependencyInfo.get(), 0x40, path); } void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, @@ -1044,7 +1029,7 @@ void MachOLinkingContext::finalizeInputFiles() { llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { auto *machoFile = dyn_cast<MachOFile>(&file); if (!machoFile) - return llvm::Error(); + return llvm::Error::success(); // Check that the arch of the context matches that of the file. // Also set the arch of the context if it didn't have one. @@ -1111,7 +1096,7 @@ llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { return llvm::make_error<GenericError>("different swift versions"); } - return llvm::Error(); + return llvm::Error::success(); } } // end namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h index 92a21f7ef83d..60d76d4b5c9b 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -42,6 +42,7 @@ #ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H #define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H +#include "DebugInfo.h" #include "lld/Core/Error.h" #include "lld/Core/LLVM.h" #include "lld/ReaderWriter/MachOLinkingContext.h" @@ -226,7 +227,6 @@ struct DataInCode { DataRegionType kind; }; - /// A typedef so that YAML I/O can encode/decode mach_header.flags. LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) @@ -242,6 +242,7 @@ struct NormalizedFile { std::vector<Symbol> localSymbols; std::vector<Symbol> globalSymbols; std::vector<Symbol> undefinedSymbols; + std::vector<Symbol> stabsSymbols; // Maps to load commands with no LINKEDIT content (final linked images only). std::vector<DependentDylib> dependentDylibs; diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp index a17de5be1742..23c7ea17f7e7 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -70,12 +70,12 @@ static llvm::Error forEachLoadCommand( return llvm::make_error<GenericError>("Load command exceeds range"); if (func(slc->cmd, slc->cmdsize, p)) - return llvm::Error(); + return llvm::Error::success(); p += slc->cmdsize; } - return llvm::Error(); + return llvm::Error::success(); } static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, @@ -390,12 +390,14 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb, if (sin->n_strx > strSize) return true; sout.name = &strings[sin->n_strx]; - sout.type = (NListType)(sin->n_type & N_TYPE); + sout.type = static_cast<NListType>(sin->n_type & (N_STAB|N_TYPE)); sout.scope = (sin->n_type & (N_PEXT|N_EXT)); sout.sect = sin->n_sect; sout.desc = sin->n_desc; sout.value = sin->n_value; - if (sout.type == N_UNDF) + if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); + else if (sout.type == N_UNDF) f->undefinedSymbols.push_back(sout); else if (sin->n_type & N_EXT) f->globalSymbols.push_back(sout); @@ -429,6 +431,8 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb, f->undefinedSymbols.push_back(sout); else if (sout.scope == (SymbolScope)N_EXT) f->globalSymbols.push_back(sout); + else if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); else f->localSymbols.push_back(sout); } @@ -535,7 +539,7 @@ public: loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®istry) const override { std::unique_ptr<File> ret = - llvm::make_unique<MachOFile>(std::move(mb), &_ctx); + llvm::make_unique<MachOFile>(std::move(mb), &_ctx); return std::move(ret); } diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h index 86823efa33c9..d69c5389e9d6 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -71,22 +71,19 @@ using llvm::sys::getSwappedBytes; template<typename T> static inline uint16_t read16(const T *loc, bool isBig) { - assert((uint64_t)loc % llvm::alignOf<T>() == 0 && - "invalid pointer alignment"); + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); return isBig ? read16be(loc) : read16le(loc); } template<typename T> static inline uint32_t read32(const T *loc, bool isBig) { - assert((uint64_t)loc % llvm::alignOf<T>() == 0 && - "invalid pointer alignment"); + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); return isBig ? read32be(loc) : read32le(loc); } template<typename T> static inline uint64_t read64(const T *loc, bool isBig) { - assert((uint64_t)loc % llvm::alignOf<T>() == 0 && - "invalid pointer alignment"); + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); return isBig ? read64be(loc) : read64le(loc); } diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp index f3e159684e15..e853faf9112e 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -66,26 +66,9 @@ struct TrieEdge : public llvm::ilist_node<TrieEdge> { namespace llvm { - using lld::mach_o::normalized::TrieEdge; - template <> - struct ilist_traits<TrieEdge> - : public ilist_default_traits<TrieEdge> { - private: - mutable ilist_half_node<TrieEdge> Sentinel; - public: - TrieEdge *createSentinel() const { - return static_cast<TrieEdge*>(&Sentinel); - } - void destroySentinel(TrieEdge *) const {} - - TrieEdge *provideInitialHead() const { return createSentinel(); } - TrieEdge *ensureHead(TrieEdge*) const { return createSentinel(); } - static void noteHead(TrieEdge*, TrieEdge*) {} - void deleteNode(TrieEdge *N) {} - - private: - void createNode(const TrieEdge &); - }; +using lld::mach_o::normalized::TrieEdge; +template <> +struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {}; } // namespace llvm @@ -103,6 +86,9 @@ struct TrieNode { void addSymbol(const Export &entry, BumpPtrAllocator &allocator, std::vector<TrieNode *> &allNodes); + + void addOrderedNodes(const Export &entry, + std::vector<TrieNode *> &allNodes); bool updateOffset(uint32_t &offset); void appendToByteBuffer(ByteBuffer &out); @@ -115,6 +101,7 @@ private: StringRef _importedName; uint32_t _trieOffset; bool _hasExportInfo; + bool _ordered = false; }; /// Utility class for writing a mach-o binary file given an in-memory @@ -639,7 +626,9 @@ llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { seg->vmsize = _file.sections.back().address + _file.sections.back().content.size(); seg->fileoff = _endOfLoadCommands; - seg->filesize = seg->vmsize; + seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + + _file.sections.back().content.size() - + _sectInfo[&_file.sections.front()].fileOffset; seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; seg->nsects = _file.sections.size(); @@ -668,7 +657,7 @@ llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { ++sout; } lc = next; - return llvm::Error(); + return llvm::Error::success(); } template <typename T> @@ -738,7 +727,7 @@ llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { } lc = reinterpret_cast<uint8_t*>(next); } - return llvm::Error(); + return llvm::Error::success(); } static void writeVersionMinLoadCommand(const NormalizedFile &_file, @@ -789,8 +778,8 @@ llvm::Error MachOFileLayout::writeLoadCommands() { st->cmd = LC_SYMTAB; st->cmdsize = sizeof(symtab_command); st->symoff = _startOfSymbols; - st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() - + _file.undefinedSymbols.size(); + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); st->stroff = _startOfSymbolStrings; st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; if (_swap) @@ -876,8 +865,8 @@ llvm::Error MachOFileLayout::writeLoadCommands() { st->cmd = LC_SYMTAB; st->cmdsize = sizeof(symtab_command); st->symoff = _startOfSymbols; - st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() - + _file.undefinedSymbols.size(); + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); st->stroff = _startOfSymbolStrings; st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; if (_swap) @@ -890,7 +879,8 @@ llvm::Error MachOFileLayout::writeLoadCommands() { dst->cmd = LC_DYSYMTAB; dst->cmdsize = sizeof(dysymtab_command); dst->ilocalsym = _symbolTableLocalsStartIndex; - dst->nlocalsym = _file.localSymbols.size(); + dst->nlocalsym = _file.stabsSymbols.size() + + _file.localSymbols.size(); dst->iextdefsym = _symbolTableGlobalsStartIndex; dst->nextdefsym = _file.globalSymbols.size(); dst->iundefsym = _symbolTableUndefinesStartIndex; @@ -1017,7 +1007,7 @@ llvm::Error MachOFileLayout::writeLoadCommands() { lc += sizeof(linkedit_data_command); } } - return llvm::Error(); + return llvm::Error::success(); } void MachOFileLayout::writeSectionContent() { @@ -1101,7 +1091,10 @@ void MachOFileLayout::writeSymbolTable() { // Write symbol table and symbol strings in parallel. uint32_t symOffset = _startOfSymbols; uint32_t strOffset = _startOfSymbolStrings; - _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name. + // Reserve n_strx offset of zero to mean no name. + _buffer[strOffset++] = ' '; + _buffer[strOffset++] = '\0'; + appendSymbols(_file.stabsSymbols, symOffset, strOffset); appendSymbols(_file.localSymbols, symOffset, strOffset); appendSymbols(_file.globalSymbols, symOffset, strOffset); appendSymbols(_file.undefinedSymbols, symOffset, strOffset); @@ -1182,19 +1175,44 @@ void MachOFileLayout::buildRebaseInfo() { void MachOFileLayout::buildBindInfo() { // TODO: compress bind info. uint64_t lastAddend = 0; + int lastOrdinal = 0x80000000; + StringRef lastSymbolName; + BindType lastType = (BindType)0; + Hex32 lastSegOffset = ~0U; + uint8_t lastSegIndex = (uint8_t)~0U; for (const BindLocation& entry : _file.bindingInfo) { - _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); - _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB - | entry.segIndex); - _bindingInfo.append_uleb128(entry.segOffset); - if (entry.ordinal > 0) - _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - (entry.ordinal & 0xF)); - else - _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | - (entry.ordinal & 0xF)); - _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); - _bindingInfo.append_string(entry.symbolName); + if (entry.ordinal != lastOrdinal) { + if (entry.ordinal <= 0) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _bindingInfo.append_uleb128(entry.ordinal); + } + lastOrdinal = entry.ordinal; + } + + if (lastSymbolName != entry.symbolName) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + lastSymbolName = entry.symbolName; + } + + if (lastType != entry.kind) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + lastType = entry.kind; + } + + if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + lastSegIndex = entry.segIndex; + lastSegOffset = entry.segOffset; + } if (entry.addend != lastAddend) { _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); _bindingInfo.append_sleb128(entry.addend); @@ -1208,22 +1226,25 @@ void MachOFileLayout::buildBindInfo() { void MachOFileLayout::buildLazyBindInfo() { for (const BindLocation& entry : _file.lazyBindingInfo) { - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | entry.segIndex); - _lazyBindingInfo.append_uleb128Fixed(entry.segOffset, 5); - if (entry.ordinal > 0) - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - (entry.ordinal & 0xF)); - else + _lazyBindingInfo.append_uleb128(entry.segOffset); + if (entry.ordinal <= 0) _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | - (entry.ordinal & 0xF)); + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _lazyBindingInfo.append_uleb128(entry.ordinal); + } + // FIXME: We need to | the opcode here with flags. _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); _lazyBindingInfo.append_string(entry.symbolName); _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); } - _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); _lazyBindingInfo.align(_is64 ? 8 : 4); } @@ -1287,6 +1308,24 @@ void TrieNode::addSymbol(const Export& entry, allNodes.push_back(newNode); } +void TrieNode::addOrderedNodes(const Export& entry, + std::vector<TrieNode*> &orderedNodes) { + if (!_ordered) { + orderedNodes.push_back(this); + _ordered = true; + } + + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addOrderedNodes(entry, orderedNodes); + return; + } + } +} + bool TrieNode::updateOffset(uint32_t& offset) { uint32_t nodeSize = 1; // Length when no export info if (_hasExportInfo) { @@ -1392,20 +1431,26 @@ void MachOFileLayout::buildExportTrie() { rootNode->addSymbol(entry, allocator, allNodes); } + std::vector<TrieNode*> orderedNodes; + orderedNodes.reserve(allNodes.size()); + + for (const Export& entry : _file.exportInfo) + rootNode->addOrderedNodes(entry, orderedNodes); + // Assign each node in the vector an offset in the trie stream, iterating // until all uleb128 sizes have stabilized. bool more; do { uint32_t offset = 0; more = false; - for (TrieNode* node : allNodes) { + for (TrieNode* node : orderedNodes) { if (node->updateOffset(offset)) more = true; } } while (more); // Serialize trie to ByteBuffer. - for (TrieNode* node : allNodes) { + for (TrieNode* node : orderedNodes) { node->appendToByteBuffer(_exportTrie); } _exportTrie.align(_is64 ? 8 : 4); @@ -1414,10 +1459,15 @@ void MachOFileLayout::buildExportTrie() { void MachOFileLayout::computeSymbolTableSizes() { // MachO symbol tables have three ranges: locals, globals, and undefines const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); - _symbolTableSize = nlistSize * (_file.localSymbols.size() + _symbolTableSize = nlistSize * (_file.stabsSymbols.size() + + _file.localSymbols.size() + _file.globalSymbols.size() + _file.undefinedSymbols.size()); - _symbolStringPoolSize = 1; // Always reserve 1-byte for the empty string. + // Always reserve 1-byte for the empty string and 1-byte for its terminator. + _symbolStringPoolSize = 2; + for (const Symbol &sym : _file.stabsSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } for (const Symbol &sym : _file.localSymbols) { _symbolStringPoolSize += (sym.name.size()+1); } @@ -1428,7 +1478,8 @@ void MachOFileLayout::computeSymbolTableSizes() { _symbolStringPoolSize += (sym.name.size()+1); } _symbolTableLocalsStartIndex = 0; - _symbolTableGlobalsStartIndex = _file.localSymbols.size(); + _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + + _file.localSymbols.size(); _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + _file.globalSymbols.size(); @@ -1486,7 +1537,7 @@ llvm::Error MachOFileLayout::writeBinary(StringRef path) { writeLinkEditContent(); fob->commit(); - return llvm::Error(); + return llvm::Error::success(); } /// Takes in-memory normalized view and writes a mach-o object file. diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp index 4775c75f7211..ddd3259842e2 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -22,6 +22,7 @@ #include "MachONormalizedFile.h" #include "ArchHandler.h" +#include "DebugInfo.h" #include "MachONormalizedFileBinaryUtils.h" #include "lld/Core/Error.h" #include "lld/Core/LLVM.h" @@ -34,6 +35,7 @@ #include "llvm/Support/MachO.h" #include <map> #include <system_error> +#include <unordered_set> using llvm::StringRef; using llvm::isa; @@ -120,6 +122,7 @@ public: void copySectionInfo(NormalizedFile &file); void updateSectionInfo(NormalizedFile &file); void buildAtomToAddressMap(); + llvm::Error synthesizeDebugNotes(NormalizedFile &file); llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); @@ -201,6 +204,7 @@ private: bool _allSourceFilesHaveMinVersions = true; LoadCommandType _minVersionCommandType = (LoadCommandType)0; uint32_t _minVersion = 0; + std::vector<lld::mach_o::Stab> _stabs; }; Util::~Util() { @@ -785,6 +789,158 @@ void Util::buildAtomToAddressMap() { } } +llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) { + + // Bail out early if we don't need to generate a debug map. + if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap) + return llvm::Error::success(); + + std::vector<const DefinedAtom*> atomsNeedingDebugNotes; + std::set<const mach_o::MachOFile*> filesWithStabs; + bool objFileHasDwarf = false; + const File *objFile = nullptr; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) { + + // FIXME: No stabs/debug-notes for symbols that wouldn't be in the + // symbol table. + // FIXME: No stabs/debug-notes for kernel dtrace probes. + + if (atom->contentType() == DefinedAtom::typeCFI || + atom->contentType() == DefinedAtom::typeCString) + continue; + + // Whenever we encounter a new file, update the 'objfileHasDwarf' flag. + if (&info.atom->file() != objFile) { + objFileHasDwarf = false; + if (const mach_o::MachOFile *atomFile = + dyn_cast<mach_o::MachOFile>(&info.atom->file())) { + if (atomFile->debugInfo()) { + if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo())) + objFileHasDwarf = true; + else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo())) + filesWithStabs.insert(atomFile); + } + } + } + + // If this atom is from a file that needs dwarf, add it to the list. + if (objFileHasDwarf) + atomsNeedingDebugNotes.push_back(info.atom); + } + } + } + + // Sort atoms needing debug notes by file ordinal, then atom ordinal. + std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(), + [](const DefinedAtom *lhs, const DefinedAtom *rhs) { + if (lhs->file().ordinal() != rhs->file().ordinal()) + return (lhs->file().ordinal() < rhs->file().ordinal()); + return (lhs->ordinal() < rhs->ordinal()); + }); + + // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \ + // linker which add N_AST stab entry to output + // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64. + + StringRef oldFileName = ""; + StringRef oldDirPath = ""; + bool wroteStartSO = false; + std::unordered_set<std::string> seenFiles; + for (const DefinedAtom *atom : atomsNeedingDebugNotes) { + const auto &atomFile = cast<mach_o::MachOFile>(atom->file()); + assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo()) + && "file for atom needing debug notes does not contain dwarf"); + auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo()); + + auto &tu = dwarf.translationUnitSource(); + StringRef newFileName = tu.name; + StringRef newDirPath = tu.path; + + // Add an SO whenever the TU source file changes. + if (newFileName != oldFileName || newDirPath != oldDirPath) { + // Translation unit change, emit ending SO + if (oldFileName != "") + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + oldFileName = newFileName; + oldDirPath = newDirPath; + + // If newDirPath doesn't end with a '/' we need to add one: + if (newDirPath.back() != '/') { + char *p = + file.ownedAllocations.Allocate<char>(newDirPath.size() + 2); + memcpy(p, newDirPath.data(), newDirPath.size()); + p[newDirPath.size()] = '/'; + p[newDirPath.size() + 1] = '\0'; + newDirPath = p; + } + + // New translation unit, emit start SOs: + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath)); + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName)); + + // Synthesize OSO for start of file. + char *fullPath = nullptr; + { + SmallString<1024> pathBuf(atomFile.path()); + if (auto EC = llvm::sys::fs::make_absolute(pathBuf)) + return llvm::errorCodeToError(EC); + fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1); + memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1); + } + + // Get mod time. + uint32_t modTime = 0; + llvm::sys::fs::file_status stat; + if (!llvm::sys::fs::status(fullPath, stat)) + if (llvm::sys::fs::exists(stat)) + modTime = llvm::sys::toTimeT(stat.getLastModificationTime()); + + _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1, + modTime, fullPath)); + // <rdar://problem/6337329> linker should put cpusubtype in n_sect field + // of nlist entry for N_OSO debug note entries. + wroteStartSO = true; + } + + if (atom->contentType() == DefinedAtom::typeCode) { + // Synthesize BNSYM and start FUN stabs. + _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, "")); + _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name())); + // Synthesize any SOL stabs needed + // FIXME: add SOL stabs. + _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0, + atom->rawContent().size(), "")); + _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0, + atom->rawContent().size(), "")); + } else { + if (atom->scope() == Atom::scopeTranslationUnit) + _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name())); + else + _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name())); + } + } + + // Emit ending SO if necessary. + if (wroteStartSO) + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + // Copy any stabs from .o file. + for (const auto *objFile : filesWithStabs) { + const auto &stabsList = + cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs(); + for (auto &stab : stabsList) { + // FIXME: Drop stabs whose atoms have been dead-stripped. + _stabs.push_back(stab); + } + } + + return llvm::Error::success(); +} + uint16_t Util::descBits(const DefinedAtom* atom) { uint16_t desc = 0; switch (atom->merge()) { @@ -807,7 +963,8 @@ uint16_t Util::descBits(const DefinedAtom* atom) { desc |= REFERENCED_DYNAMICALLY; if (_archHandler.isThumbFunction(*atom)) desc |= N_ARM_THUMB_DEF; - if (atom->deadStrip() == DefinedAtom::deadStripNever) { + if (atom->deadStrip() == DefinedAtom::deadStripNever && + _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) { if ((atom->contentType() != DefinedAtom::typeInitializerPtr) && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) desc |= N_NO_DEAD_STRIP; @@ -828,7 +985,7 @@ llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, case Atom::scopeTranslationUnit: scope = 0; inGlobalsRegion = false; - return llvm::Error(); + return llvm::Error::success(); case Atom::scopeLinkageUnit: if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) && _ctx.exportSymbolNamed(atom->name())) { @@ -840,38 +997,55 @@ llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, // -keep_private_externs means keep in globals region as N_PEXT. scope = N_PEXT | N_EXT; inGlobalsRegion = true; - return llvm::Error(); + return llvm::Error::success(); } } // scopeLinkageUnit symbols are no longer global once linked. scope = N_PEXT; inGlobalsRegion = false; - return llvm::Error(); + return llvm::Error::success(); case Atom::scopeGlobal: if (_ctx.exportRestrictMode()) { if (_ctx.exportSymbolNamed(atom->name())) { scope = N_EXT; inGlobalsRegion = true; - return llvm::Error(); + return llvm::Error::success(); } else { scope = N_PEXT; inGlobalsRegion = false; - return llvm::Error(); + return llvm::Error::success(); } } else { scope = N_EXT; inGlobalsRegion = true; - return llvm::Error(); + return llvm::Error::success(); } break; } llvm_unreachable("atom->scope() unknown enum value"); } + + llvm::Error Util::addSymbols(const lld::File &atomFile, NormalizedFile &file) { bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); - // Mach-O symbol table has three regions: locals, globals, undefs. + // Mach-O symbol table has four regions: stabs, locals, globals, undefs. + + // Add all stabs. + for (auto &stab : _stabs) { + Symbol sym; + sym.type = static_cast<NListType>(stab.type); + sym.scope = 0; + sym.sect = stab.other; + sym.desc = stab.desc; + if (stab.atom) + sym.value = _atomToAddress[stab.atom]; + else + sym.value = stab.value; + sym.name = stab.str; + file.stabsSymbols.push_back(sym); + } // Add all local (non-global) symbols in address order std::vector<AtomAndIndex> globals; @@ -965,7 +1139,7 @@ llvm::Error Util::addSymbols(const lld::File &atomFile, file.undefinedSymbols.push_back(sym); } - return llvm::Error(); + return llvm::Error::success(); } const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { @@ -1040,15 +1214,15 @@ void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { } } -void Util::addDependentDylibs(const lld::File &atomFile,NormalizedFile &nFile) { +void Util::addDependentDylibs(const lld::File &atomFile, + NormalizedFile &nFile) { // Scan all imported symbols and build up list of dylibs they are from. int ordinal = 1; - for (const SharedLibraryAtom *slAtom : atomFile.sharedLibrary()) { - StringRef loadPath = slAtom->loadName(); - DylibPathToInfo::iterator pos = _dylibInfo.find(loadPath); + for (const auto *dylib : _ctx.allDylibs()) { + DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName()); if (pos == _dylibInfo.end()) { DylibInfo info; - bool flatNamespaceAtom = &slAtom->file() == _ctx.flatNamespaceFile(); + bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile(); // If we're in -flat_namespace mode (or this atom came from the flat // namespace file under -undefined dynamic_lookup) then use the flat @@ -1057,24 +1231,22 @@ void Util::addDependentDylibs(const lld::File &atomFile,NormalizedFile &nFile) { info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; else info.ordinal = ordinal++; - info.hasWeak = slAtom->canBeNullAtRuntime(); + info.hasWeak = false; info.hasNonWeak = !info.hasWeak; - _dylibInfo[loadPath] = info; + _dylibInfo[dylib->installName()] = info; // Unless this was a flat_namespace atom, record the source dylib. if (!flatNamespaceAtom) { DependentDylib depInfo; - depInfo.path = loadPath; + depInfo.path = dylib->installName(); depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; - depInfo.currentVersion = _ctx.dylibCurrentVersion(loadPath); - depInfo.compatVersion = _ctx.dylibCompatVersion(loadPath); + depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path()); + depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path()); nFile.dependentDylibs.push_back(depInfo); } } else { - if ( slAtom->canBeNullAtRuntime() ) - pos->second.hasWeak = true; - else - pos->second.hasNonWeak = true; + pos->second.hasWeak = false; + pos->second.hasNonWeak = !pos->second.hasWeak; } } // Automatically weak link dylib in which all symbols are weak (canBeNull). @@ -1404,6 +1576,8 @@ normalizedFromAtoms(const lld::File &atomFile, util.copySectionInfo(normFile); util.assignAddressesToSections(normFile); util.buildAtomToAddressMap(); + if (auto err = util.synthesizeDebugNotes(normFile)) + return std::move(err); util.updateSectionInfo(normFile); util.copySectionContent(normFile); if (auto ec = util.addSymbols(atomFile, normFile)) { diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp index fc760a3eddd0..4b17f7b3a85f 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -27,7 +27,11 @@ #include "MachONormalizedFileBinaryUtils.h" #include "lld/Core/Error.h" #include "lld/Core/LLVM.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/MachO.h" #include "llvm/Support/LEB128.h" @@ -320,7 +324,7 @@ llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, // If section has no symbols and no content, there are no atoms. if (symbols.empty() && section.content.empty()) - return llvm::Error(); + return llvm::Error::success(); if (symbols.empty()) { // Section has no symbols, put all content in one anoymous atom. @@ -371,7 +375,7 @@ llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, }); } - return llvm::Error(); + return llvm::Error::success(); } llvm::Error processSection(DefinedAtom::ContentType atomType, @@ -483,7 +487,7 @@ llvm::Error processSection(DefinedAtom::ContentType atomType, offset += size; } } - return llvm::Error(); + return llvm::Error::success(); } const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, @@ -499,7 +503,7 @@ const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, const MachODefinedAtom * findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, - uint64_t addr, Reference::Addend *addend) { + uint64_t addr, Reference::Addend &addend) { const Section *sect = nullptr; sect = findSectionCoveringAddress(normalizedFile, addr); if (!sect) @@ -509,7 +513,7 @@ findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, uint64_t offsetInSect = addr - sect->address; auto atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); - *addend = offsetInTarget; + addend = offsetInTarget; return atom; } @@ -540,7 +544,7 @@ llvm::Error convertRelocs(const Section §ion, uint64_t offsetInSect = addr - sect->address; *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); *addend = offsetInTarget; - return llvm::Error(); + return llvm::Error::success(); }; // Utility function for ArchHandler to find atom by its symbol index. @@ -548,19 +552,23 @@ llvm::Error convertRelocs(const Section §ion, -> llvm::Error { // Find symbol from index. const Symbol *sym = nullptr; + uint32_t numStabs = normalizedFile.stabsSymbols.size(); uint32_t numLocal = normalizedFile.localSymbols.size(); uint32_t numGlobal = normalizedFile.globalSymbols.size(); uint32_t numUndef = normalizedFile.undefinedSymbols.size(); - if (symbolIndex < numLocal) { - sym = &normalizedFile.localSymbols[symbolIndex]; - } else if (symbolIndex < numLocal+numGlobal) { - sym = &normalizedFile.globalSymbols[symbolIndex-numLocal]; - } else if (symbolIndex < numLocal+numGlobal+numUndef) { - sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal]; + assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); + if (symbolIndex < numStabs+numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; + } else if (symbolIndex < numStabs+numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; + } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- + numGlobal]; } else { return llvm::make_error<GenericError>(Twine("symbol index (") + Twine(symbolIndex) + ") out of range"); } + // Find atom from symbol. if ((sym->type & N_TYPE) == N_SECT) { if (sym->sect > normalizedFile.sections.size()) @@ -572,14 +580,14 @@ llvm::Error convertRelocs(const Section §ion, targetOffsetInSect); if (target) { *result = target; - return llvm::Error(); + return llvm::Error::success(); } return llvm::make_error<GenericError>("no atom found for defined symbol"); } else if ((sym->type & N_TYPE) == N_UNDF) { const lld::Atom *target = file.findUndefAtom(sym->name); if (target) { *result = target; - return llvm::Error(); + return llvm::Error::success(); } return llvm::make_error<GenericError>("no undefined atom found for sym"); } else { @@ -676,7 +684,7 @@ llvm::Error convertRelocs(const Section §ion, kind, offsetInAtom, target, addend); } - return llvm::Error(); + return llvm::Error::success(); } bool isDebugInfoSection(const Section §ion) { @@ -685,6 +693,301 @@ bool isDebugInfoSection(const Section §ion) { return section.segmentName.equals("__DWARF"); } +static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { + std::string strName = name.str(); + for (auto *atom : file.defined()) + if (atom->name() == strName) + return atom; + return nullptr; +} + +static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { + char *strCopy = alloc.Allocate<char>(str.size() + 1); + memcpy(strCopy, str.data(), str.size()); + strCopy[str.size()] = '\0'; + return strCopy; +} + +llvm::Error parseStabs(MachOFile &file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + + if (normalizedFile.stabsSymbols.empty()) + return llvm::Error::success(); + + // FIXME: Kill this off when we can move to sane yaml parsing. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) + allocator = llvm::make_unique<BumpPtrAllocator>(); + + enum { start, inBeginEnd } state = start; + + const Atom *currentAtom = nullptr; + uint64_t currentAtomAddress = 0; + StabsDebugInfo::StabsList stabsList; + for (const auto &stabSym : normalizedFile.stabsSymbols) { + Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, + stabSym.value, stabSym.name); + switch (state) { + case start: + switch (static_cast<StabType>(stabSym.type)) { + case N_BNSYM: + state = inBeginEnd; + currentAtomAddress = stabSym.value; + Reference::Addend addend; + currentAtom = findAtomCoveringAddress(normalizedFile, file, + currentAtomAddress, addend); + if (addend != 0) + return llvm::make_error<GenericError>( + "Non-zero addend for BNSYM '" + stabSym.name + "' in " + + file.path()); + if (currentAtom) + stab.atom = currentAtom; + else { + // FIXME: ld64 just issues a warning here - should we match that? + return llvm::make_error<GenericError>( + "can't find atom for stabs BNSYM at " + + Twine::utohexstr(stabSym.value) + " in " + file.path()); + } + break; + case N_SO: + case N_OSO: + // Not associated with an atom, just copy. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + case N_GSYM: { + auto colonIdx = stabSym.name.find(':'); + if (colonIdx != StringRef::npos) { + StringRef name = stabSym.name.substr(0, colonIdx); + currentAtom = findDefinedAtomByName(file, "_" + name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } else { + currentAtom = findDefinedAtomByName(file, stabSym.name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } + if (stab.atom == nullptr) + return llvm::make_error<GenericError>( + "can't find atom for N_GSYM stabs" + stabSym.name + + " in " + file.path()); + break; + } + case N_FUN: + return llvm::make_error<GenericError>( + "old-style N_FUN stab '" + stabSym.name + "' unsupported"); + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + break; + case inBeginEnd: + stab.atom = currentAtom; + switch (static_cast<StabType>(stabSym.type)) { + case N_ENSYM: + state = start; + currentAtom = nullptr; + break; + case N_FUN: + // Just copy the string. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + } + llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; + stabsList.push_back(stab); + } + + file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList))); + + // FIXME: Kill this off when we fix YAML memory ownership. + file.debugInfo()->setAllocator(std::move(allocator)); + + return llvm::Error::success(); +} + +static llvm::DataExtractor +dataExtractorFromSection(const NormalizedFile &normalizedFile, + const Section &S) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + StringRef SecData(reinterpret_cast<const char*>(S.content.data()), + S.content.size()); + return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, + uint64_t abbrCode) { + uint64_t curCode; + uint32_t offset = 0; + while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { + // Tag + abbrevData.getULEB128(&offset); + // DW_CHILDREN + abbrevData.getU8(&offset); + // Attributes + while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) + ; + } + return offset; +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static Expected<const char *> +getIndexedString(const NormalizedFile &normalizedFile, + llvm::dwarf::Form form, llvm::DataExtractor infoData, + uint32_t &infoOffset, const Section &stringsSection) { + if (form == llvm::dwarf::DW_FORM_string) + return infoData.getCStr(&infoOffset); + if (form != llvm::dwarf::DW_FORM_strp) + return llvm::make_error<GenericError>( + "string field encoded without DW_FORM_strp"); + uint32_t stringOffset = infoData.getU32(&infoOffset); + llvm::DataExtractor stringsData = + dataExtractorFromSection(normalizedFile, stringsSection); + return stringsData.getCStr(&stringOffset); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static llvm::Expected<TranslationUnitSource> +readCompUnit(const NormalizedFile &normalizedFile, + const Section &info, + const Section &abbrev, + const Section &strings, + StringRef path) { + // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE + // inspection" code if possible. + uint32_t offset = 0; + llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; + auto infoData = dataExtractorFromSection(normalizedFile, info); + uint32_t length = infoData.getU32(&offset); + if (length == 0xffffffff) { + Format = llvm::dwarf::DwarfFormat::DWARF64; + infoData.getU64(&offset); + } + else if (length > 0xffffff00) + return llvm::make_error<GenericError>("Malformed DWARF in " + path); + + uint16_t version = infoData.getU16(&offset); + + if (version < 2 || version > 4) + return llvm::make_error<GenericError>("Unsupported DWARF version in " + + path); + + infoData.getU32(&offset); // Abbrev offset (should be zero) + uint8_t addrSize = infoData.getU8(&offset); + + uint32_t abbrCode = infoData.getULEB128(&offset); + auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); + uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); + uint64_t tag = abbrevData.getULEB128(&abbrevOffset); + if (tag != llvm::dwarf::DW_TAG_compile_unit) + return llvm::make_error<GenericError>("top level DIE is not a compile unit"); + // DW_CHILDREN + abbrevData.getU8(&abbrevOffset); + uint32_t name; + llvm::dwarf::Form form; + TranslationUnitSource tu; + while ((name = abbrevData.getULEB128(&abbrevOffset)) | + (form = static_cast<llvm::dwarf::Form>( + abbrevData.getULEB128(&abbrevOffset))) && + (name != 0 || form != 0)) { + switch (name) { + case llvm::dwarf::DW_AT_name: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.name = *eName; + else + return eName.takeError(); + break; + } + case llvm::dwarf::DW_AT_comp_dir: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.path = *eName; + else + return eName.takeError(); + break; + } + default: + llvm::DWARFFormValue::skipValue(form, infoData, &offset, version, + addrSize, Format); + } + } + return tu; +} + +llvm::Error parseDebugInfo(MachOFile &file, + const NormalizedFile &normalizedFile, bool copyRefs) { + + // Find the interesting debug info sections. + const Section *debugInfo = nullptr; + const Section *debugAbbrev = nullptr; + const Section *debugStrings = nullptr; + + for (auto &s : normalizedFile.sections) { + if (s.segmentName == "__DWARF") { + if (s.sectionName == "__debug_info") + debugInfo = &s; + else if (s.sectionName == "__debug_abbrev") + debugAbbrev = &s; + else if (s.sectionName == "__debug_str") + debugStrings = &s; + } + } + + if (!debugInfo) + return parseStabs(file, normalizedFile, copyRefs); + + if (debugInfo->content.size() == 0) + return llvm::Error::success(); + + if (debugInfo->content.size() < 12) + return llvm::make_error<GenericError>("Malformed __debug_info section in " + + file.path() + ": too small"); + + if (!debugAbbrev) + return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + + file.path()); + + if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, + *debugStrings, file.path())) { + // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML + // memory ownership. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) { + allocator = llvm::make_unique<BumpPtrAllocator>(); + tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); + tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); + } + file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); + if (copyRefs) + file.debugInfo()->setAllocator(std::move(allocator)); + } else + return tuOrErr.takeError(); + + return llvm::Error::success(); +} + static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { if (is64) return read64(addr, isBig); @@ -712,7 +1015,7 @@ static llvm::Error processAugmentationString(const uint8_t *augStr, if (augStr[0] == '\0') { len = 1; - return llvm::Error(); + return llvm::Error::success(); } if (augStr[0] != 'z') @@ -762,7 +1065,7 @@ static llvm::Error processAugmentationString(const uint8_t *augStr, cieInfo._augmentationDataLength = offsetInAugmentationData; len = idx + 1; - return llvm::Error(); + return llvm::Error::success(); } static llvm::Error processCIE(const NormalizedFile &normalizedFile, @@ -853,7 +1156,7 @@ static llvm::Error processCIE(const NormalizedFile &normalizedFile, const MachODefinedAtom *func = nullptr; Reference::Addend addend; func = findAtomCoveringAddress(normalizedFile, file, funcAddress, - &addend); + addend); atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), handler.unwindRefToPersonalityFunctionKind(), PersonalityFunctionField, func, addend); @@ -867,7 +1170,7 @@ static llvm::Error processCIE(const NormalizedFile &normalizedFile, cieInfos[atom] = std::move(cieInfo); - return llvm::Error(); + return llvm::Error::success(); } static llvm::Error processFDE(const NormalizedFile &normalizedFile, @@ -936,7 +1239,7 @@ static llvm::Error processFDE(const NormalizedFile &normalizedFile, } Reference::Addend addend; auto *target = findAtomCoveringAddress(normalizedFile, file, - targetAddress, &addend); + targetAddress, addend); atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), refKind, refAddress, target, addend); @@ -1011,7 +1314,7 @@ static llvm::Error processFDE(const NormalizedFile &normalizedFile, } } - return llvm::Error(); + return llvm::Error::success(); } llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, @@ -1028,9 +1331,9 @@ llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, // No __eh_frame so nothing to do. if (!ehFrameSection) - return llvm::Error(); + return llvm::Error::success(); - llvm::Error ehFrameErr; + llvm::Error ehFrameErr = llvm::Error::success(); CIEInfoMap cieInfos; file.eachAtomInSection(*ehFrameSection, @@ -1092,10 +1395,9 @@ llvm::Error parseObjCImageInfo(const Section §, file.setSwiftVersion((flags >> 8) & 0xFF); - return llvm::Error(); + return llvm::Error::success(); } - /// Converts normalized mach-o file into an lld::File and lld::Atoms. llvm::Expected<std::unique_ptr<lld::File>> objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, @@ -1135,11 +1437,11 @@ normalizedObjectToAtoms(MachOFile *file, // Create atoms from each section. for (auto § : normalizedFile.sections) { - DEBUG(llvm::dbgs() << "Creating atoms: "; sect.dump()); + + // If this is a debug-info section parse it specially. if (isDebugInfoSection(sect)) continue; - // If the file contains an objc_image_info struct, then we should parse the // ObjC flags and Swift version. if (isObjCImageInfo(sect)) { @@ -1248,7 +1550,11 @@ normalizedObjectToAtoms(MachOFile *file, for (const DefinedAtom* defAtom : file->defined()) { reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); } - return llvm::Error(); + + if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) + return err; + + return llvm::Error::success(); } llvm::Error @@ -1279,7 +1585,7 @@ normalizedDylibToAtoms(MachODylibFile *file, if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) file->addReExportedDylib(dep.path); } - return llvm::Error(); + return llvm::Error::success(); } void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, @@ -1324,14 +1630,6 @@ normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, } } -#ifndef NDEBUG -void Section::dump(llvm::raw_ostream &OS) const { - OS << "Section (\"" << segmentName << ", " << sectionName << "\""; - OS << ", addr: " << llvm::format_hex(address, 16, true); - OS << ", size: " << llvm::format_hex(content.size(), 8, true) << ")\n"; -} -#endif - } // namespace normalized } // namespace mach_o } // namespace lld diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp index 66be77173983..218170965eca 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -234,6 +234,8 @@ struct ScalarBitSetTraits<SectionAttr> { llvm::MachO::S_ATTR_EXT_RELOC); io.bitSetCase(value, "S_ATTR_LOC_RELOC", llvm::MachO::S_ATTR_LOC_RELOC); + io.bitSetCase(value, "S_ATTR_DEBUG", + llvm::MachO::S_ATTR_DEBUG); } }; diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp index ba24b3fecdf4..4712d8ca969c 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp @@ -103,7 +103,7 @@ public: // Add the image info. mergedFile.addAtom(*getImageInfo()); - return llvm::Error(); + return llvm::Error::success(); } private: diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp index cd5367146658..ff559d70eabe 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp @@ -66,7 +66,7 @@ public: } // Exit early if no shims needed. if (_targetToShim.empty()) - return llvm::Error(); + return llvm::Error::success(); // Sort shim atoms so the layout order is stable. std::vector<const DefinedAtom *> shims; @@ -83,7 +83,7 @@ public: for (const DefinedAtom *shim : shims) mergedFile.addAtom(*shim); - return llvm::Error(); + return llvm::Error::success(); } private: diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp index d53b78b24d14..19e2bc592f5c 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp @@ -218,7 +218,7 @@ public: llvm::Error perform(SimpleFile &mergedFile) override { // Skip this pass if output format uses text relocations instead of stubs. if (!this->noTextRelocs()) - return llvm::Error(); + return llvm::Error::success(); // Scan all references in all atoms. for (const DefinedAtom *atom : mergedFile.defined()) { @@ -245,7 +245,7 @@ public: // Exit early if no stubs needed. if (_targetToUses.empty()) - return llvm::Error(); + return llvm::Error::success(); // First add help-common and GOT slots used by lazy binding. SimpleDefinedAtom *helperCommonAtom = @@ -323,7 +323,7 @@ public: lazyOffset += target->name().size() + 12; } - return llvm::Error(); + return llvm::Error::success(); } private: diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp index 7a8496c20a4e..e362e507ebf2 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp @@ -107,7 +107,7 @@ private: for (const TLVPEntryAtom *slot : entries) mergedFile.addAtom(*slot); - return llvm::Error(); + return llvm::Error::success(); } const DefinedAtom *makeTLVPEntry(const Atom *target) { diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp index ee2a9ec10883..59ca43079a6d 100644 --- a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp +++ b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp @@ -249,6 +249,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) // Always write DefinedAtoms content bytes as a flow sequence. LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) + // for compatibility with gcc-4.7 in C++11 mode, add extra namespace namespace llvm { namespace yaml { @@ -567,10 +568,10 @@ template <> struct DocumentListTraits<std::vector<const lld::File *> > { // YAML conversion for const lld::File* template <> struct MappingTraits<const lld::File *> { - class NormArchiveFile : public lld::ArchiveLibraryFile { public: - NormArchiveFile(IO &io) : ArchiveLibraryFile(""), _path() {} + NormArchiveFile(IO &io) : ArchiveLibraryFile("") {} + NormArchiveFile(IO &io, const lld::File *file) : ArchiveLibraryFile(file->path()), _path(file->path()) { // If we want to support writing archives, this constructor would @@ -627,6 +628,7 @@ template <> struct MappingTraits<const lld::File *> { _undefinedAtomsRef(_undefinedAtoms._atoms), _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms), _absoluteAtomsRef(_absoluteAtoms._atoms) {} + NormalizedFile(IO &io, const lld::File *file) : File(file->path(), kindNormalizedObject), _io(io), _rnb(new RefNameBuilder(*file)), _path(file->path()), @@ -673,7 +675,7 @@ template <> struct MappingTraits<const lld::File *> { } IO &_io; - std::unique_ptr<RefNameBuilder> _rnb; + std::unique_ptr<RefNameBuilder> _rnb; StringRef _path; AtomList<lld::DefinedAtom> _definedAtoms; AtomList<lld::UndefinedAtom> _undefinedAtoms; @@ -732,13 +734,12 @@ template <> struct MappingTraits<const lld::File *> { // YAML conversion for const lld::Reference* template <> struct MappingTraits<const lld::Reference *> { - class NormalizedReference : public lld::Reference { public: NormalizedReference(IO &io) : lld::Reference(lld::Reference::KindNamespace::all, lld::Reference::KindArch::all, 0), - _target(nullptr), _targetName(), _offset(0), _addend(0), _tag(0) {} + _target(nullptr), _offset(0), _addend(0), _tag(0) {} NormalizedReference(IO &io, const lld::Reference *ref) : lld::Reference(ref->kindNamespace(), ref->kindArch(), @@ -768,6 +769,7 @@ template <> struct MappingTraits<const lld::Reference *> { setKindValue(_mappedKind.value); return this; } + void bind(const RefNameResolver &); static StringRef targetName(IO &io, const lld::Reference *ref); @@ -804,13 +806,13 @@ template <> struct MappingTraits<const lld::DefinedAtom *> { class NormalizedAtom : public lld::DefinedAtom { public: NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _name(), _refName(), _contentType(), - _alignment(1), _content(), _references() { + : _file(fileFromContext(io)), _contentType(), _alignment(1) { static uint32_t ordinalCounter = 1; _ordinal = ordinalCounter++; } + NormalizedAtom(IO &io, const lld::DefinedAtom *atom) - : _file(fileFromContext(io)), _name(atom->name()), _refName(), + : _file(fileFromContext(io)), _name(atom->name()), _scope(atom->scope()), _interpose(atom->interposable()), _merge(atom->merge()), _contentType(atom->contentType()), _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), @@ -991,11 +993,10 @@ template <> struct MappingTraits<lld::DefinedAtom *> { // YAML conversion for const lld::UndefinedAtom* template <> struct MappingTraits<const lld::UndefinedAtom *> { - class NormalizedAtom : public lld::UndefinedAtom { public: NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _name(), _canBeNull(canBeNullNever) {} + : _file(fileFromContext(io)), _canBeNull(canBeNullNever) {} NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) : _file(fileFromContext(io)), _name(atom->name()), @@ -1059,8 +1060,9 @@ template <> struct MappingTraits<const lld::SharedLibraryAtom *> { class NormalizedAtom : public lld::SharedLibraryAtom { public: NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _name(), _loadName(), _canBeNull(false), + : _file(fileFromContext(io)), _canBeNull(false), _type(Type::Unknown), _size(0) {} + NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) : _file(fileFromContext(io)), _name(atom->name()), _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), @@ -1133,11 +1135,11 @@ template <> struct MappingTraits<lld::SharedLibraryAtom *> { // YAML conversion for const lld::AbsoluteAtom* template <> struct MappingTraits<const lld::AbsoluteAtom *> { - class NormalizedAtom : public lld::AbsoluteAtom { public: NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _name(), _scope(), _value(0) {} + : _file(fileFromContext(io)), _scope(), _value(0) {} + NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) : _file(fileFromContext(io)), _name(atom->name()), _scope(atom->scope()), _value(atom->value()) {} @@ -1158,6 +1160,7 @@ template <> struct MappingTraits<const lld::AbsoluteAtom *> { << ", " << _name.size() << ")\n"); return this; } + // Extract current File object from YAML I/O parsing context const lld::File &fileFromContext(IO &io) { YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); @@ -1309,7 +1312,7 @@ public: const lld::File *fileRef = &file; yout << fileRef; - return llvm::Error(); + return llvm::Error::success(); } private: diff --git a/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt index 5951da6496c5..0c946a3f8ce0 100644 --- a/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt +++ b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt @@ -1,4 +1,4 @@ -add_llvm_executable(lld +add_lld_tool(lld lld.cpp ) @@ -17,8 +17,5 @@ if(NOT LLD_SYMLINKS_TO_CREATE) endif() foreach(link ${LLD_SYMLINKS_TO_CREATE}) - add_llvm_tool_symlink(${link} lld ALWAYS_GENERATE) - # Always generate install targets - llvm_install_symlink(${link} lld ALWAYS_GENERATE) + add_lld_symlink(${link} lld) endforeach() - diff --git a/contrib/llvm/tools/lld/tools/lld/lld.cpp b/contrib/llvm/tools/lld/tools/lld/lld.cpp index 4f265a7b84b8..f42ccfe3d36a 100644 --- a/contrib/llvm/tools/lld/tools/lld/lld.cpp +++ b/contrib/llvm/tools/lld/tools/lld/lld.cpp @@ -43,9 +43,7 @@ LLVM_ATTRIBUTE_NORETURN static void die(const Twine &S) { static Flavor getFlavor(StringRef S) { return StringSwitch<Flavor>(S) - .Case("ld", Gnu) - .Case("ld.lld", Gnu) - .Case("gnu", Gnu) + .Cases("ld", "ld.lld", "gnu", Gnu) .Case("link", WinLink) .Case("darwin", Darwin) .Default(Invalid); @@ -101,5 +99,5 @@ int main(int Argc, const char **Argv) { llvm_shutdown_obj Shutdown; std::vector<const char *> Args(Argv, Argv + Argc); - return !elf::link(Args); + return !elf::link(Args, true); } |