diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2013-12-22 00:04:03 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2013-12-22 00:04:03 +0000 |
commit | f8af5cf600354830d4ccf59732403f0f073eccb9 (patch) | |
tree | 2ba0398b4c42ad4f55561327538044fd2c925a8b /tools/llvm-symbolizer | |
parent | 59d6cff90eecf31cb3dd860c4e786674cfdd42eb (diff) | |
download | src-f8af5cf600354830d4ccf59732403f0f073eccb9.tar.gz src-f8af5cf600354830d4ccf59732403f0f073eccb9.zip |
Vendor import of llvm release_34 branch r197841 (effectively, 3.4 RC3):vendor/llvm/llvm-release_34-r197841
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=259698
svn path=/vendor/llvm/llvm-release_34-r197841/; revision=259700; tag=vendor/llvm/llvm-release_34-r197841
Diffstat (limited to 'tools/llvm-symbolizer')
-rw-r--r-- | tools/llvm-symbolizer/LLVMSymbolize.cpp | 254 | ||||
-rw-r--r-- | tools/llvm-symbolizer/LLVMSymbolize.h | 40 | ||||
-rw-r--r-- | tools/llvm-symbolizer/llvm-symbolizer.cpp | 6 |
3 files changed, 243 insertions, 57 deletions
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index 29d91a0e92a3..320ab3fcbcfe 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -13,11 +13,17 @@ #include "LLVMSymbolize.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" #include "llvm/Object/MachO.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include <sstream> +#include <stdlib.h> namespace llvm { namespace symbolize { @@ -63,15 +69,23 @@ ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) SymbolAddress == UnknownAddressOrSize) continue; uint64_t SymbolSize; - if (error(si->getSize(SymbolSize)) || SymbolSize == UnknownAddressOrSize) + // Getting symbol size is linear for Mach-O files, so assume that symbol + // occupies the memory range up to the following symbol. + if (isa<MachOObjectFile>(Obj)) + SymbolSize = 0; + else if (error(si->getSize(SymbolSize)) || + SymbolSize == UnknownAddressOrSize) continue; StringRef SymbolName; if (error(si->getName(SymbolName))) continue; + // Mach-O symbol table names have leading underscore, skip it. + if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') + SymbolName = SymbolName.drop_front(); // FIXME: If a function has alias, there are two entries in symbol table // with same address size. Make sure we choose the correct one. SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolAddress + SymbolSize }; + SymbolDesc SD = { SymbolAddress, SymbolSize }; M.insert(std::make_pair(SD, SymbolName)); } } @@ -80,15 +94,18 @@ bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const { const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { Address, Address + 1 }; - SymbolMapTy::const_iterator it = M.find(SD); - if (it == M.end()) + if (M.empty()) return false; - if (Address < it->first.Addr || Address >= it->first.AddrEnd) + SymbolDesc SD = { Address, Address }; + SymbolMapTy::const_iterator it = M.upper_bound(SD); + if (it == M.begin()) + return false; + --it; + if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address) return false; Name = it->second.str(); Addr = it->first.Addr; - Size = it->first.AddrEnd - it->first.Addr; + Size = it->first.Size; return true; } @@ -178,8 +195,8 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, uint64_t Size = 0; if (Opts.UseSymbolTable) { if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { - if (Info->symbolizeData(ModuleOffset, Name, Start, Size)) - DemangleName(Name); + if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) + Name = DemangleGlobalName(Name); } } std::stringstream ss; @@ -189,23 +206,12 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, void LLVMSymbolizer::flush() { DeleteContainerSeconds(Modules); + DeleteContainerPointers(ParsedBinariesAndObjects); + BinaryForPath.clear(); + ObjectFileForArch.clear(); } -// Returns true if the object endianness is known. -static bool getObjectEndianness(const ObjectFile *Obj, bool &IsLittleEndian) { - // FIXME: Implement this when libLLVMObject allows to do it easily. - IsLittleEndian = true; - return true; -} - -static ObjectFile *getObjectFile(const std::string &Path) { - OwningPtr<MemoryBuffer> Buff; - if (error_code ec = MemoryBuffer::getFile(Path, Buff)) - error(ec); - return ObjectFile::createObjectFile(Buff.take()); -} - -static std::string getDarwinDWARFResourceForModule(const std::string &Path) { +static std::string getDarwinDWARFResourceForPath(const std::string &Path) { StringRef Basename = sys::path::filename(Path); const std::string &DSymDirectory = Path + ".dSYM"; SmallString<16> ResourceName = StringRef(DSymDirectory); @@ -214,36 +220,176 @@ static std::string getDarwinDWARFResourceForModule(const std::string &Path) { return ResourceName.str(); } +static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { + OwningPtr<MemoryBuffer> MB; + if (MemoryBuffer::getFileOrSTDIN(Path, MB)) + return false; + return !zlib::isAvailable() || CRCHash == zlib::crc32(MB->getBuffer()); +} + +static bool findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, uint32_t CRCHash, + std::string &Result) { + std::string OrigRealPath = OrigPath; +#if defined(HAVE_REALPATH) + if (char *RP = realpath(OrigPath.c_str(), NULL)) { + OrigRealPath = RP; + free(RP); + } +#endif + SmallString<16> OrigDir(OrigRealPath); + llvm::sys::path::remove_filename(OrigDir); + SmallString<16> DebugPath = OrigDir; + // Try /path/to/original_binary/debuglink_name + llvm::sys::path::append(DebugPath, DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /path/to/original_binary/.debug/debuglink_name + DebugPath = OrigRealPath; + llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /usr/lib/debug/path/to/original_binary/debuglink_name + DebugPath = "/usr/lib/debug"; + llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), + DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + return false; +} + +static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName, + uint32_t &CRCHash) { + const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin); + if (!Obj) + return false; + error_code EC; + for (section_iterator I = Obj->begin_sections(), E = Obj->end_sections(); + I != E; I.increment(EC)) { + StringRef Name; + I->getName(Name); + Name = Name.substr(Name.find_first_not_of("._")); + if (Name == "gnu_debuglink") { + StringRef Data; + I->getContents(Data); + DataExtractor DE(Data, Obj->isLittleEndian(), 0); + uint32_t Offset = 0; + if (const char *DebugNameStr = DE.getCStr(&Offset)) { + // 4-byte align the offset. + Offset = (Offset + 3) & ~0x3; + if (DE.isValidOffsetForDataOfSize(Offset, 4)) { + DebugName = DebugNameStr; + CRCHash = DE.getU32(&Offset); + return true; + } + } + break; + } + } + return false; +} + +LLVMSymbolizer::BinaryPair +LLVMSymbolizer::getOrCreateBinary(const std::string &Path) { + BinaryMapTy::iterator I = BinaryForPath.find(Path); + if (I != BinaryForPath.end()) + return I->second; + Binary *Bin = 0; + Binary *DbgBin = 0; + OwningPtr<Binary> ParsedBinary; + OwningPtr<Binary> ParsedDbgBinary; + if (!error(createBinary(Path, ParsedBinary))) { + // Check if it's a universal binary. + Bin = ParsedBinary.take(); + ParsedBinariesAndObjects.push_back(Bin); + if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { + // On Darwin we may find DWARF in separate object file in + // resource directory. + const std::string &ResourcePath = + getDarwinDWARFResourceForPath(Path); + bool ResourceFileExists = false; + if (!sys::fs::exists(ResourcePath, ResourceFileExists) && + ResourceFileExists && + !error(createBinary(ResourcePath, ParsedDbgBinary))) { + DbgBin = ParsedDbgBinary.take(); + ParsedBinariesAndObjects.push_back(DbgBin); + } + } + // Try to locate the debug binary using .gnu_debuglink section. + if (DbgBin == 0) { + std::string DebuglinkName; + uint32_t CRCHash; + std::string DebugBinaryPath; + if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) && + findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath) && + !error(createBinary(DebugBinaryPath, ParsedDbgBinary))) { + DbgBin = ParsedDbgBinary.take(); + ParsedBinariesAndObjects.push_back(DbgBin); + } + } + } + if (DbgBin == 0) + DbgBin = Bin; + BinaryPair Res = std::make_pair(Bin, DbgBin); + BinaryForPath[Path] = Res; + return Res; +} + +ObjectFile * +LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { + if (Bin == 0) + return 0; + ObjectFile *Res = 0; + if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) { + ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( + std::make_pair(UB, ArchName)); + if (I != ObjectFileForArch.end()) + return I->second; + OwningPtr<ObjectFile> ParsedObj; + if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) { + Res = ParsedObj.take(); + ParsedBinariesAndObjects.push_back(Res); + } + ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; + } else if (Bin->isObject()) { + Res = cast<ObjectFile>(Bin); + } + return Res; +} + ModuleInfo * LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { ModuleMapTy::iterator I = Modules.find(ModuleName); if (I != Modules.end()) return I->second; + std::string BinaryName = ModuleName; + std::string ArchName = Opts.DefaultArch; + size_t ColonPos = ModuleName.find_last_of(':'); + // Verify that substring after colon form a valid arch name. + if (ColonPos != std::string::npos) { + std::string ArchStr = ModuleName.substr(ColonPos + 1); + if (Triple(ArchStr).getArch() != Triple::UnknownArch) { + BinaryName = ModuleName.substr(0, ColonPos); + ArchName = ArchStr; + } + } + BinaryPair Binaries = getOrCreateBinary(BinaryName); + ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); + ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); - ObjectFile *Obj = getObjectFile(ModuleName); if (Obj == 0) { - // Module name doesn't point to a valid object file. + // Failed to find valid object file. Modules.insert(make_pair(ModuleName, (ModuleInfo *)0)); return 0; } - - DIContext *Context = 0; - bool IsLittleEndian; - if (getObjectEndianness(Obj, IsLittleEndian)) { - // On Darwin we may find DWARF in separate object file in - // resource directory. - ObjectFile *DbgObj = Obj; - if (isa<MachOObjectFile>(Obj)) { - const std::string &ResourceName = - getDarwinDWARFResourceForModule(ModuleName); - ObjectFile *ResourceObj = getObjectFile(ResourceName); - if (ResourceObj != 0) - DbgObj = ResourceObj; - } - Context = DIContext::getDWARFContext(DbgObj); - assert(Context); - } - + DIContext *Context = DIContext::getDWARFContext(DbgObj); + assert(Context); ModuleInfo *Info = new ModuleInfo(Obj, Context); Modules.insert(make_pair(ModuleName, Info)); return Info; @@ -258,7 +404,8 @@ std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { std::string FunctionName = LineInfo.getFunctionName(); if (FunctionName == kDILineInfoBadString) FunctionName = kBadString; - DemangleName(FunctionName); + else if (Opts.Demangle) + FunctionName = DemangleName(FunctionName); Result << FunctionName << "\n"; } std::string Filename = LineInfo.getFileName(); @@ -275,18 +422,25 @@ extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -void LLVMSymbolizer::DemangleName(std::string &Name) const { +std::string LLVMSymbolizer::DemangleName(const std::string &Name) { #if !defined(_MSC_VER) - if (!Opts.Demangle) - return; int status = 0; char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status); if (status != 0) - return; - Name = DemangledName; + return Name; + std::string Result = DemangledName; free(DemangledName); + return Result; +#else + return Name; #endif } +std::string LLVMSymbolizer::DemangleGlobalName(const std::string &Name) { + // We can spoil names of globals with C linkage, so use an heuristic + // approach to check if the name should be demangled. + return (Name.substr(0, 2) == "_Z") ? DemangleName(Name) : Name; +} + } // namespace symbolize } // namespace llvm diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h index 0733dfbbc52e..eb2666a542c9 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.h +++ b/tools/llvm-symbolizer/LLVMSymbolize.h @@ -14,7 +14,9 @@ #define LLVM_SYMBOLIZE_H #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" #include <map> @@ -35,14 +37,20 @@ public: bool PrintFunctions : 1; bool PrintInlining : 1; bool Demangle : 1; + std::string DefaultArch; Options(bool UseSymbolTable = true, bool PrintFunctions = true, - bool PrintInlining = true, bool Demangle = true) + bool PrintInlining = true, bool Demangle = true, + std::string DefaultArch = "") : UseSymbolTable(UseSymbolTable), PrintFunctions(PrintFunctions), - PrintInlining(PrintInlining), Demangle(Demangle) { + PrintInlining(PrintInlining), Demangle(Demangle), + DefaultArch(DefaultArch) { } }; LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {} + ~LLVMSymbolizer() { + flush(); + } // Returns the result of symbolization for module name/offset as // a string (possibly containing newlines). @@ -51,13 +59,31 @@ public: std::string symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); + static std::string DemangleName(const std::string &Name); private: + typedef std::pair<Binary*, Binary*> BinaryPair; + ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); + /// \brief Returns pair of pointers to binary and debug binary. + BinaryPair getOrCreateBinary(const std::string &Path); + /// \brief Returns a parsed object file for a given architecture in a + /// universal binary (or the binary itself if it is an object file). + ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); + std::string printDILineInfo(DILineInfo LineInfo) const; - void DemangleName(std::string &Name) const; + static std::string DemangleGlobalName(const std::string &Name); + // Owns all the parsed binaries and object files. + SmallVector<Binary*, 4> ParsedBinariesAndObjects; + // Owns module info objects. typedef std::map<std::string, ModuleInfo *> ModuleMapTy; ModuleMapTy Modules; + typedef std::map<std::string, BinaryPair> BinaryMapTy; + BinaryMapTy BinaryForPath; + typedef std::map<std::pair<MachOUniversalBinary *, std::string>, ObjectFile *> + ObjectFileForArchMapTy; + ObjectFileForArchMapTy ObjectFileForArch; + Options Opts; static const char kBadString[]; }; @@ -77,14 +103,16 @@ private: bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const; - OwningPtr<ObjectFile> Module; + ObjectFile *Module; OwningPtr<DIContext> DebugInfoContext; struct SymbolDesc { uint64_t Addr; - uint64_t AddrEnd; + // If size is 0, assume that symbol occupies the whole memory range up to + // the following symbol. + uint64_t Size; friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { - return s1.AddrEnd <= s2.Addr; + return s1.Addr < s2.Addr; } }; typedef std::map<SymbolDesc, StringRef> SymbolMapTy; diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index 0cafffaf7126..c32e9494ea35 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -47,6 +47,10 @@ ClPrintInlining("inlining", cl::init(true), static cl::opt<bool> ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names")); +static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""), + cl::desc("Default architecture " + "(for multi-arch objects)")); + static bool parseCommand(bool &IsData, std::string &ModuleName, uint64_t &ModuleOffset) { const char *kDataCmd = "DATA "; @@ -102,7 +106,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n"); LLVMSymbolizer::Options Opts(ClUseSymbolTable, ClPrintFunctions, - ClPrintInlining, ClDemangle); + ClPrintInlining, ClDemangle, ClDefaultArch); LLVMSymbolizer Symbolizer(Opts); bool IsData = false; |