diff options
Diffstat (limited to 'lib/DebugInfo/PDB/Raw')
22 files changed, 3279 insertions, 0 deletions
diff --git a/lib/DebugInfo/PDB/Raw/DbiStream.cpp b/lib/DebugInfo/PDB/Raw/DbiStream.cpp new file mode 100644 index 000000000000..3c0586c728f9 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/DbiStream.cpp @@ -0,0 +1,462 @@ +//===- DbiStream.cpp - PDB Dbi Stream (Stream 3) Access -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" + +#include "llvm/DebugInfo/CodeView/StreamArray.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/ModInfo.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" +#include "llvm/Object/COFF.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; +using namespace llvm::support; + +namespace { +// Some of the values are stored in bitfields. Since this needs to be portable +// across compilers and architectures (big / little endian in particular) we +// can't use the actual structures below, but must instead do the shifting +// and masking ourselves. The struct definitions are provided for reference. + +// struct DbiFlags { +// uint16_t IncrementalLinking : 1; // True if linked incrementally +// uint16_t IsStripped : 1; // True if private symbols were stripped. +// uint16_t HasCTypes : 1; // True if linked with /debug:ctypes. +// uint16_t Reserved : 13; +//}; +const uint16_t FlagIncrementalMask = 0x0001; +const uint16_t FlagStrippedMask = 0x0002; +const uint16_t FlagHasCTypesMask = 0x0004; + +// struct DbiBuildNo { +// uint16_t MinorVersion : 8; +// uint16_t MajorVersion : 7; +// uint16_t NewVersionFormat : 1; +//}; +const uint16_t BuildMinorMask = 0x00FF; +const uint16_t BuildMinorShift = 0; + +const uint16_t BuildMajorMask = 0x7F00; +const uint16_t BuildMajorShift = 8; + +struct FileInfoSubstreamHeader { + ulittle16_t NumModules; // Total # of modules, should match number of + // records in the ModuleInfo substream. + ulittle16_t NumSourceFiles; // Total # of source files. This value is not + // accurate because PDB actually supports more + // than 64k source files, so we ignore it and + // compute the value from other stream fields. +}; +} + +template <typename ContribType> +static Error loadSectionContribs(FixedStreamArray<ContribType> &Output, + StreamReader &Reader) { + if (Reader.bytesRemaining() % sizeof(ContribType) != 0) + return make_error<RawError>( + raw_error_code::corrupt_file, + "Invalid number of bytes of section contributions"); + + uint32_t Count = Reader.bytesRemaining() / sizeof(ContribType); + if (auto EC = Reader.readArray(Output, Count)) + return EC; + return Error::success(); +} + +DbiStream::DbiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)), Header(nullptr) { + static_assert(sizeof(HeaderInfo) == 64, "Invalid HeaderInfo size!"); +} + +DbiStream::~DbiStream() {} + +Error DbiStream::reload() { + StreamReader Reader(*Stream); + + if (Stream->getLength() < sizeof(HeaderInfo)) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Stream does not contain a header."); + if (auto EC = Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Stream does not contain a header."); + + if (Header->VersionSignature != -1) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid DBI version signature."); + + // Require at least version 7, which should be present in all PDBs + // produced in the last decade and allows us to avoid having to + // special case all kinds of complicated arcane formats. + if (Header->VersionHeader < PdbDbiV70) + return make_error<RawError>(raw_error_code::feature_unsupported, + "Unsupported DBI version."); + + auto IS = Pdb.getPDBInfoStream(); + if (!IS) + return IS.takeError(); + + if (Header->Age != IS->getAge()) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Age does not match PDB Age."); + + if (Stream->getLength() != + sizeof(HeaderInfo) + Header->ModiSubstreamSize + + Header->SecContrSubstreamSize + Header->SectionMapSize + + Header->FileInfoSize + Header->TypeServerSize + + Header->OptionalDbgHdrSize + Header->ECSubstreamSize) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Length does not equal sum of substreams."); + + // Only certain substreams are guaranteed to be aligned. Validate + // them here. + if (Header->ModiSubstreamSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI MODI substream not aligned."); + if (Header->SecContrSubstreamSize % sizeof(uint32_t) != 0) + return make_error<RawError>( + raw_error_code::corrupt_file, + "DBI section contribution substream not aligned."); + if (Header->SectionMapSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI section map substream not aligned."); + if (Header->FileInfoSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI file info substream not aligned."); + if (Header->TypeServerSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI type server substream not aligned."); + + // Since each ModInfo in the stream is a variable length, we have to iterate + // them to know how many there actually are. + VarStreamArray<ModInfo> ModInfoArray; + if (auto EC = Reader.readArray(ModInfoArray, Header->ModiSubstreamSize)) + return EC; + for (auto &Info : ModInfoArray) { + ModuleInfos.emplace_back(Info); + } + + if (auto EC = Reader.readStreamRef(SecContrSubstream, + Header->SecContrSubstreamSize)) + return EC; + if (auto EC = Reader.readStreamRef(SecMapSubstream, Header->SectionMapSize)) + return EC; + if (auto EC = Reader.readStreamRef(FileInfoSubstream, Header->FileInfoSize)) + return EC; + if (auto EC = + Reader.readStreamRef(TypeServerMapSubstream, Header->TypeServerSize)) + return EC; + if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize)) + return EC; + if (auto EC = Reader.readArray(DbgStreams, Header->OptionalDbgHdrSize / + sizeof(ulittle16_t))) + return EC; + + if (auto EC = initializeSectionContributionData()) + return EC; + if (auto EC = initializeSectionHeadersData()) + return EC; + if (auto EC = initializeSectionMapData()) + return EC; + if (auto EC = initializeFileInfo()) + return EC; + if (auto EC = initializeFpoRecords()) + return EC; + + if (Reader.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Found unexpected bytes in DBI Stream."); + + if (ECSubstream.getLength() > 0) { + StreamReader ECReader(ECSubstream); + if (auto EC = ECNames.load(ECReader)) + return EC; + } + + return Error::success(); +} + +PdbRaw_DbiVer DbiStream::getDbiVersion() const { + uint32_t Value = Header->VersionHeader; + return static_cast<PdbRaw_DbiVer>(Value); +} + +uint32_t DbiStream::getAge() const { return Header->Age; } + +uint16_t DbiStream::getPublicSymbolStreamIndex() const { + return Header->PublicSymbolStreamIndex; +} + +uint16_t DbiStream::getGlobalSymbolStreamIndex() const { + return Header->GlobalSymbolStreamIndex; +} + +uint16_t DbiStream::getFlags() const { return Header->Flags; } + +bool DbiStream::isIncrementallyLinked() const { + return (Header->Flags & FlagIncrementalMask) != 0; +} + +bool DbiStream::hasCTypes() const { + return (Header->Flags & FlagHasCTypesMask) != 0; +} + +bool DbiStream::isStripped() const { + return (Header->Flags & FlagStrippedMask) != 0; +} + +uint16_t DbiStream::getBuildNumber() const { return Header->BuildNumber; } + +uint16_t DbiStream::getBuildMajorVersion() const { + return (Header->BuildNumber & BuildMajorMask) >> BuildMajorShift; +} + +uint16_t DbiStream::getBuildMinorVersion() const { + return (Header->BuildNumber & BuildMinorMask) >> BuildMinorShift; +} + +uint16_t DbiStream::getPdbDllRbld() const { return Header->PdbDllRbld; } + +uint32_t DbiStream::getPdbDllVersion() const { return Header->PdbDllVersion; } + +uint32_t DbiStream::getSymRecordStreamIndex() const { + return Header->SymRecordStreamIndex; +} + +PDB_Machine DbiStream::getMachineType() const { + uint16_t Machine = Header->MachineType; + return static_cast<PDB_Machine>(Machine); +} + +codeview::FixedStreamArray<object::coff_section> +DbiStream::getSectionHeaders() { + return SectionHeaders; +} + +codeview::FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() { + return FpoRecords; +} + +ArrayRef<ModuleInfoEx> DbiStream::modules() const { return ModuleInfos; } +codeview::FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const { + return SectionMap; +} + +void llvm::pdb::DbiStream::visitSectionContributions( + ISectionContribVisitor &Visitor) const { + if (SectionContribVersion == DbiSecContribVer60) { + for (auto &SC : SectionContribs) + Visitor.visit(SC); + } else if (SectionContribVersion == DbiSecContribV2) { + for (auto &SC : SectionContribs2) + Visitor.visit(SC); + } +} + +Error DbiStream::initializeSectionContributionData() { + if (SecContrSubstream.getLength() == 0) + return Error::success(); + + StreamReader SCReader(SecContrSubstream); + if (auto EC = SCReader.readEnum(SectionContribVersion)) + return EC; + + if (SectionContribVersion == DbiSecContribVer60) + return loadSectionContribs<SectionContrib>(SectionContribs, SCReader); + if (SectionContribVersion == DbiSecContribV2) + return loadSectionContribs<SectionContrib2>(SectionContribs2, SCReader); + + return make_error<RawError>(raw_error_code::feature_unsupported, + "Unsupported DBI Section Contribution version"); +} + +// Initializes this->SectionHeaders. +Error DbiStream::initializeSectionHeadersData() { + if (DbgStreams.size() == 0) + return Error::success(); + + uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr); + if (StreamNum >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto SHS = MappedBlockStream::createIndexedStream(StreamNum, Pdb); + if (!SHS) + return SHS.takeError(); + + size_t StreamLen = (*SHS)->getLength(); + if (StreamLen % sizeof(object::coff_section)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted section header stream."); + + size_t NumSections = StreamLen / sizeof(object::coff_section); + codeview::StreamReader Reader(**SHS); + if (auto EC = Reader.readArray(SectionHeaders, NumSections)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a bitmap."); + + SectionHeaderStream = std::move(*SHS); + return Error::success(); +} + +// Initializes this->Fpos. +Error DbiStream::initializeFpoRecords() { + if (DbgStreams.size() == 0) + return Error::success(); + + uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::NewFPO); + + // This means there is no FPO data. + if (StreamNum == InvalidStreamIndex) + return Error::success(); + + if (StreamNum >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto FS = MappedBlockStream::createIndexedStream(StreamNum, Pdb); + if (!FS) + return FS.takeError(); + + size_t StreamLen = (*FS)->getLength(); + if (StreamLen % sizeof(object::FpoData)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted New FPO stream."); + + size_t NumRecords = StreamLen / sizeof(object::FpoData); + codeview::StreamReader Reader(**FS); + if (auto EC = Reader.readArray(FpoRecords, NumRecords)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted New FPO stream."); + FpoStream = std::move(*FS); + return Error::success(); +} + +Error DbiStream::initializeSectionMapData() { + if (SecMapSubstream.getLength() == 0) + return Error::success(); + + StreamReader SMReader(SecMapSubstream); + const SecMapHeader *Header; + if (auto EC = SMReader.readObject(Header)) + return EC; + if (auto EC = SMReader.readArray(SectionMap, Header->SecCount)) + return EC; + return Error::success(); +} + +Error DbiStream::initializeFileInfo() { + // The layout of the FileInfoSubstream is like this: + // struct { + // ulittle16_t NumModules; + // ulittle16_t NumSourceFiles; + // ulittle16_t ModIndices[NumModules]; + // ulittle16_t ModFileCounts[NumModules]; + // ulittle32_t FileNameOffsets[NumSourceFiles]; + // char Names[][NumSourceFiles]; + // }; + // with the caveat that `NumSourceFiles` cannot be trusted, so + // it is computed by summing `ModFileCounts`. + // + if (FileInfoSubstream.getLength() == 0) + return Error::success(); + + const FileInfoSubstreamHeader *FH; + StreamReader FISR(FileInfoSubstream); + if (auto EC = FISR.readObject(FH)) + return EC; + + // The number of modules in the stream should be the same as reported by + // the FileInfoSubstreamHeader. + if (FH->NumModules != ModuleInfos.size()) + return make_error<RawError>(raw_error_code::corrupt_file, + "FileInfo substream count doesn't match DBI."); + + FixedStreamArray<ulittle16_t> ModIndexArray; + FixedStreamArray<ulittle16_t> ModFileCountArray; + + // First is an array of `NumModules` module indices. This is not used for the + // same reason that `NumSourceFiles` is not used. It's an array of uint16's, + // but it's possible there are more than 64k source files, which would imply + // more than 64k modules (e.g. object files) as well. So we ignore this + // field. + if (auto EC = FISR.readArray(ModIndexArray, ModuleInfos.size())) + return EC; + if (auto EC = FISR.readArray(ModFileCountArray, ModuleInfos.size())) + return EC; + + // Compute the real number of source files. + uint32_t NumSourceFiles = 0; + for (auto Count : ModFileCountArray) + NumSourceFiles += Count; + + // This is the array that in the reference implementation corresponds to + // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a + // pointer. Due to the mentioned problems of pointers causing difficulty + // when reading from the file on 64-bit systems, we continue to ignore that + // field in `ModInfo`, and instead build a vector of StringRefs and stores + // them in `ModuleInfoEx`. The value written to and read from the file is + // not used anyway, it is only there as a way to store the offsets for the + // purposes of later accessing the names at runtime. + if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) + return EC; + + if (auto EC = FISR.readStreamRef(NamesBuffer)) + return EC; + + // We go through each ModuleInfo, determine the number N of source files for + // that module, and then get the next N offsets from the Offsets array, using + // them to get the corresponding N names from the Names buffer and associating + // each one with the corresponding module. + uint32_t NextFileIndex = 0; + for (size_t I = 0; I < ModuleInfos.size(); ++I) { + uint32_t NumFiles = ModFileCountArray[I]; + ModuleInfos[I].SourceFiles.resize(NumFiles); + for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { + auto ThisName = getFileNameForIndex(NextFileIndex); + if (!ThisName) + return ThisName.takeError(); + ModuleInfos[I].SourceFiles[J] = *ThisName; + } + } + + return Error::success(); +} + +uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const { + return DbgStreams[static_cast<uint16_t>(Type)]; +} + +Expected<StringRef> DbiStream::getFileNameForIndex(uint32_t Index) const { + StreamReader Names(NamesBuffer); + if (Index >= FileNameOffsets.size()) + return make_error<RawError>(raw_error_code::index_out_of_bounds); + + uint32_t FileOffset = FileNameOffsets[Index]; + Names.setOffset(FileOffset); + StringRef Name; + if (auto EC = Names.readZeroString(Name)) + return std::move(EC); + return Name; +} + +Error DbiStream::commit() { + StreamWriter Writer(*Stream); + if (auto EC = Writer.writeObject(*Header)) + return EC; + + return Error::success(); +} diff --git a/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp new file mode 100644 index 000000000000..34ff8ae3a907 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp @@ -0,0 +1,81 @@ +//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h" + +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +DbiStreamBuilder::DbiStreamBuilder() + : Age(1), BuildNumber(0), PdbDllVersion(0), PdbDllRbld(0), Flags(0), + MachineType(PDB_Machine::x86) {} + +void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; } + +void DbiStreamBuilder::setAge(uint32_t A) { Age = A; } + +void DbiStreamBuilder::setBuildNumber(uint16_t B) { BuildNumber = B; } + +void DbiStreamBuilder::setPdbDllVersion(uint16_t V) { PdbDllVersion = V; } + +void DbiStreamBuilder::setPdbDllRbld(uint16_t R) { PdbDllRbld = R; } + +void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; } + +void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; } + +uint32_t DbiStreamBuilder::calculateSerializedLength() const { + // For now we only support serializing the header. + return sizeof(DbiStream::HeaderInfo); +} + +Expected<std::unique_ptr<DbiStream>> DbiStreamBuilder::build(PDBFile &File) { + if (!VerHeader.hasValue()) + return make_error<RawError>(raw_error_code::unspecified, + "Missing DBI Stream Version"); + + auto DbiS = MappedBlockStream::createIndexedStream(StreamDBI, File); + if (!DbiS) + return DbiS.takeError(); + auto DS = std::move(*DbiS); + DbiStream::HeaderInfo *H = + static_cast<DbiStream::HeaderInfo *>(DS->getAllocator().Allocate( + sizeof(DbiStream::HeaderInfo), + llvm::AlignOf<DbiStream::HeaderInfo>::Alignment)); + H->VersionHeader = *VerHeader; + H->VersionSignature = -1; + H->Age = Age; + H->BuildNumber = BuildNumber; + H->Flags = Flags; + H->PdbDllRbld = PdbDllRbld; + H->PdbDllVersion = PdbDllVersion; + H->MachineType = static_cast<uint16_t>(MachineType); + + H->ECSubstreamSize = 0; + H->FileInfoSize = 0; + H->ModiSubstreamSize = 0; + H->OptionalDbgHdrSize = 0; + H->SecContrSubstreamSize = 0; + H->SectionMapSize = 0; + H->TypeServerSize = 0; + H->SymRecordStreamIndex = DbiStream::InvalidStreamIndex; + H->PublicSymbolStreamIndex = DbiStream::InvalidStreamIndex; + H->MFCTypeServerIndex = DbiStream::InvalidStreamIndex; + H->GlobalSymbolStreamIndex = DbiStream::InvalidStreamIndex; + + auto Dbi = llvm::make_unique<DbiStream>(File, std::move(DS)); + Dbi->Header = H; + return std::move(Dbi); +} diff --git a/lib/DebugInfo/PDB/Raw/EnumTables.cpp b/lib/DebugInfo/PDB/Raw/EnumTables.cpp new file mode 100644 index 000000000000..fc9270c69947 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/EnumTables.cpp @@ -0,0 +1,38 @@ +//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/EnumTables.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" + +using namespace llvm; +using namespace llvm::pdb; + +#define PDB_ENUM_CLASS_ENT(enum_class, enum) \ + { #enum, std::underlying_type < enum_class > ::type(enum_class::enum) } + +#define PDB_ENUM_ENT(ns, enum) \ + { #enum, ns::enum } + +static const EnumEntry<uint16_t> OMFSegMapDescFlagNames[] = { + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Read), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Write), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Execute), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, AddressIs32Bit), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsSelector), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsAbsoluteAddress), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsGroup), +}; + +namespace llvm { +namespace pdb { +ArrayRef<EnumEntry<uint16_t>> getOMFSegMapDescFlagNames() { + return makeArrayRef(OMFSegMapDescFlagNames); +} +} +}
\ No newline at end of file diff --git a/lib/DebugInfo/PDB/Raw/Hash.cpp b/lib/DebugInfo/PDB/Raw/Hash.cpp new file mode 100644 index 000000000000..23cb55786d78 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/Hash.cpp @@ -0,0 +1,131 @@ +//===- Hash.cpp - PDB Hash Functions --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/Hash.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; + +// Corresponds to `Hasher::lhashPbCb` in PDB/include/misc.h. +// Used for name hash table and TPI/IPI hashes. +uint32_t pdb::hashStringV1(StringRef Str) { + uint32_t Result = 0; + uint32_t Size = Str.size(); + + ArrayRef<ulittle32_t> Longs(reinterpret_cast<const ulittle32_t *>(Str.data()), + Size / 4); + + for (auto Value : Longs) + Result ^= Value; + + const uint8_t *Remainder = reinterpret_cast<const uint8_t *>(Longs.end()); + uint32_t RemainderSize = Size % 4; + + // Maximum of 3 bytes left. Hash a 2 byte word if possible, then hash the + // possibly remaining 1 byte. + if (RemainderSize >= 2) { + uint16_t Value = *reinterpret_cast<const ulittle16_t *>(Remainder); + Result ^= static_cast<uint32_t>(Value); + Remainder += 2; + RemainderSize -= 2; + } + + // hash possible odd byte + if (RemainderSize == 1) { + Result ^= *(Remainder++); + } + + const uint32_t toLowerMask = 0x20202020; + Result |= toLowerMask; + Result ^= (Result >> 11); + + return Result ^ (Result >> 16); +} + +// Corresponds to `HasherV2::HashULONG` in PDB/include/misc.h. +// Used for name hash table. +uint32_t pdb::hashStringV2(StringRef Str) { + uint32_t Hash = 0xb170a1bf; + + ArrayRef<char> Buffer(Str.begin(), Str.end()); + + ArrayRef<ulittle32_t> Items( + reinterpret_cast<const ulittle32_t *>(Buffer.data()), + Buffer.size() / sizeof(ulittle32_t)); + for (ulittle32_t Item : Items) { + Hash += Item; + Hash += (Hash << 10); + Hash ^= (Hash >> 6); + } + Buffer = Buffer.slice(Items.size() * sizeof(ulittle32_t)); + for (uint8_t Item : Buffer) { + Hash += Item; + Hash += (Hash << 10); + Hash ^= (Hash >> 6); + } + + return Hash * 1664525L + 1013904223L; +} + +static const uint32_t V8HashTable[] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, +}; + +// Corresponds to `SigForPbCb` in langapi/shared/crc32.h. +uint32_t pdb::hashBufferV8(ArrayRef<uint8_t> Buf) { + uint32_t Hash = 0; + for (uint8_t Byte : Buf) + Hash = (Hash >> 8) ^ V8HashTable[(Hash & 0xff) ^ Byte]; + return Hash; +} diff --git a/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp b/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp new file mode 100644 index 000000000000..9bd16ea76efc --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp @@ -0,0 +1,25 @@ +//===- IndexedStreamData.cpp - Standard PDB Stream Data ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/IPDBFile.h" + +using namespace llvm; +using namespace llvm::pdb; + +IndexedStreamData::IndexedStreamData(uint32_t StreamIdx, const IPDBFile &File) + : StreamIdx(StreamIdx), File(File) {} + +uint32_t IndexedStreamData::getLength() { + return File.getStreamByteSize(StreamIdx); +} + +ArrayRef<support::ulittle32_t> IndexedStreamData::getStreamBlocks() { + return File.getStreamBlockList(StreamIdx); +} diff --git a/lib/DebugInfo/PDB/Raw/InfoStream.cpp b/lib/DebugInfo/PDB/Raw/InfoStream.cpp new file mode 100644 index 000000000000..c33a764587c7 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/InfoStream.cpp @@ -0,0 +1,90 @@ +//===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +InfoStream::InfoStream(std::unique_ptr<MappedBlockStream> Stream) + : Stream(std::move(Stream)) {} + +Error InfoStream::reload() { + codeview::StreamReader Reader(*Stream); + + const HeaderInfo *H; + if (auto EC = Reader.readObject(H)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "PDB Stream does not contain a header.")); + + switch (H->Version) { + case PdbImplVC70: + case PdbImplVC80: + case PdbImplVC110: + case PdbImplVC140: + break; + default: + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported PDB stream version."); + } + + Version = H->Version; + Signature = H->Signature; + Age = H->Age; + Guid = H->Guid; + + return NamedStreams.load(Reader); +} + +uint32_t InfoStream::getNamedStreamIndex(llvm::StringRef Name) const { + uint32_t Result; + if (!NamedStreams.tryGetValue(Name, Result)) + return 0; + return Result; +} + +iterator_range<StringMapConstIterator<uint32_t>> +InfoStream::named_streams() const { + return NamedStreams.entries(); +} + +PdbRaw_ImplVer InfoStream::getVersion() const { + return static_cast<PdbRaw_ImplVer>(Version); +} + +uint32_t InfoStream::getSignature() const { return Signature; } + +uint32_t InfoStream::getAge() const { return Age; } + +PDB_UniqueId InfoStream::getGuid() const { return Guid; } + +Error InfoStream::commit() { + StreamWriter Writer(*Stream); + + HeaderInfo H; + H.Age = Age; + H.Signature = Signature; + H.Version = Version; + H.Guid = Guid; + if (auto EC = Writer.writeObject(H)) + return EC; + + return NamedStreams.commit(Writer); +} diff --git a/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp new file mode 100644 index 000000000000..7be9cc32db96 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp @@ -0,0 +1,67 @@ +//===- InfoStreamBuilder.cpp - PDB Info Stream Creation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h" + +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +InfoStreamBuilder::InfoStreamBuilder() {} + +void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; } + +void InfoStreamBuilder::setSignature(uint32_t S) { Sig = S; } + +void InfoStreamBuilder::setAge(uint32_t A) { Age = A; } + +void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; } + +NameMapBuilder &InfoStreamBuilder::getNamedStreamsBuilder() { + return NamedStreams; +} + +uint32_t InfoStreamBuilder::calculateSerializedLength() const { + return sizeof(InfoStream::HeaderInfo) + + NamedStreams.calculateSerializedLength(); +} + +Expected<std::unique_ptr<InfoStream>> InfoStreamBuilder::build(PDBFile &File) { + if (!Ver.hasValue()) + return make_error<RawError>(raw_error_code::unspecified, + "Missing PDB Stream Version"); + if (!Sig.hasValue()) + return make_error<RawError>(raw_error_code::unspecified, + "Missing PDB Stream Signature"); + if (!Age.hasValue()) + return make_error<RawError>(raw_error_code::unspecified, + "Missing PDB Stream Age"); + if (!Guid.hasValue()) + return make_error<RawError>(raw_error_code::unspecified, + "Missing PDB Stream Guid"); + + auto InfoS = MappedBlockStream::createIndexedStream(StreamPDB, File); + if (!InfoS) + return InfoS.takeError(); + auto Info = llvm::make_unique<InfoStream>(std::move(*InfoS)); + Info->Version = *Ver; + Info->Signature = *Sig; + Info->Age = *Age; + Info->Guid = *Guid; + auto NS = NamedStreams.build(); + if (!NS) + return NS.takeError(); + Info->NamedStreams = **NS; + return std::move(Info); +} diff --git a/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp b/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp new file mode 100644 index 000000000000..92b2048c3c22 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp @@ -0,0 +1,310 @@ +//===- MappedBlockStream.cpp - Reads stream data from a PDBFile -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +// This exists so that we can use make_unique while still keeping the +// constructor of MappedBlockStream private, forcing users to go through +// the `create` interface. +class MappedBlockStreamImpl : public MappedBlockStream { +public: + MappedBlockStreamImpl(std::unique_ptr<IPDBStreamData> Data, + const IPDBFile &File) + : MappedBlockStream(std::move(Data), File) {} +}; +} + +typedef std::pair<uint32_t, uint32_t> Interval; +static Interval intersect(const Interval &I1, const Interval &I2) { + return std::make_pair(std::max(I1.first, I2.first), + std::min(I1.second, I2.second)); +} + +MappedBlockStream::MappedBlockStream(std::unique_ptr<IPDBStreamData> Data, + const IPDBFile &Pdb) + : Pdb(Pdb), Data(std::move(Data)) {} + +Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, + ArrayRef<uint8_t> &Buffer) const { + // Make sure we aren't trying to read beyond the end of the stream. + if (Size > Data->getLength()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + if (Offset > Data->getLength() - Size) + return make_error<RawError>(raw_error_code::insufficient_buffer); + + if (tryReadContiguously(Offset, Size, Buffer)) + return Error::success(); + + auto CacheIter = CacheMap.find(Offset); + if (CacheIter != CacheMap.end()) { + // Try to find an alloc that was large enough for this request. + for (auto &Entry : CacheIter->second) { + if (Entry.size() >= Size) { + Buffer = Entry.slice(0, Size); + return Error::success(); + } + } + } + + // We couldn't find a buffer that started at the correct offset (the most + // common scenario). Try to see if there is a buffer that starts at some + // other offset but overlaps the desired range. + for (auto &CacheItem : CacheMap) { + Interval RequestExtent = std::make_pair(Offset, Offset + Size); + + // We already checked this one on the fast path above. + if (CacheItem.first == Offset) + continue; + // If the initial extent of the cached item is beyond the ending extent + // of the request, there is no overlap. + if (CacheItem.first >= Offset + Size) + continue; + + // We really only have to check the last item in the list, since we append + // in order of increasing length. + if (CacheItem.second.empty()) + continue; + + auto CachedAlloc = CacheItem.second.back(); + // If the initial extent of the request is beyond the ending extent of + // the cached item, there is no overlap. + Interval CachedExtent = + std::make_pair(CacheItem.first, CacheItem.first + CachedAlloc.size()); + if (RequestExtent.first >= CachedExtent.first + CachedExtent.second) + continue; + + Interval Intersection = intersect(CachedExtent, RequestExtent); + // Only use this if the entire request extent is contained in the cached + // extent. + if (Intersection != RequestExtent) + continue; + + uint32_t CacheRangeOffset = + AbsoluteDifference(CachedExtent.first, Intersection.first); + Buffer = CachedAlloc.slice(CacheRangeOffset, Size); + return Error::success(); + } + + // Otherwise allocate a large enough buffer in the pool, memcpy the data + // into it, and return an ArrayRef to that. Do not touch existing pool + // allocations, as existing clients may be holding a pointer which must + // not be invalidated. + uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8)); + if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size))) + return EC; + + if (CacheIter != CacheMap.end()) { + CacheIter->second.emplace_back(WriteBuffer, Size); + } else { + std::vector<CacheEntry> List; + List.emplace_back(WriteBuffer, Size); + CacheMap.insert(std::make_pair(Offset, List)); + } + Buffer = ArrayRef<uint8_t>(WriteBuffer, Size); + return Error::success(); +} + +Error MappedBlockStream::readLongestContiguousChunk( + uint32_t Offset, ArrayRef<uint8_t> &Buffer) const { + // Make sure we aren't trying to read beyond the end of the stream. + if (Offset >= Data->getLength()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + uint32_t First = Offset / Pdb.getBlockSize(); + uint32_t Last = First; + + auto BlockList = Data->getStreamBlocks(); + while (Last < Pdb.getBlockCount() - 1) { + if (BlockList[Last] != BlockList[Last + 1] - 1) + break; + ++Last; + } + + uint32_t OffsetInFirstBlock = Offset % Pdb.getBlockSize(); + uint32_t BytesFromFirstBlock = Pdb.getBlockSize() - OffsetInFirstBlock; + uint32_t BlockSpan = Last - First + 1; + uint32_t ByteSpan = + BytesFromFirstBlock + (BlockSpan - 1) * Pdb.getBlockSize(); + auto Result = Pdb.getBlockData(BlockList[First], Pdb.getBlockSize()); + if (!Result) + return Result.takeError(); + Buffer = Result->drop_front(OffsetInFirstBlock); + Buffer = ArrayRef<uint8_t>(Buffer.data(), ByteSpan); + return Error::success(); +} + +uint32_t MappedBlockStream::getLength() const { return Data->getLength(); } + +Error MappedBlockStream::commit() const { return Error::success(); } + +bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size, + ArrayRef<uint8_t> &Buffer) const { + // Attempt to fulfill the request with a reference directly into the stream. + // This can work even if the request crosses a block boundary, provided that + // all subsequent blocks are contiguous. For example, a 10k read with a 4k + // block size can be filled with a reference if, from the starting offset, + // 3 blocks in a row are contiguous. + uint32_t BlockNum = Offset / Pdb.getBlockSize(); + uint32_t OffsetInBlock = Offset % Pdb.getBlockSize(); + uint32_t BytesFromFirstBlock = + std::min(Size, Pdb.getBlockSize() - OffsetInBlock); + uint32_t NumAdditionalBlocks = + llvm::alignTo(Size - BytesFromFirstBlock, Pdb.getBlockSize()) / + Pdb.getBlockSize(); + + auto BlockList = Data->getStreamBlocks(); + uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1; + uint32_t E = BlockList[BlockNum]; + for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) { + if (BlockList[I + BlockNum] != E) + return false; + } + + uint32_t FirstBlockAddr = BlockList[BlockNum]; + auto Result = Pdb.getBlockData(FirstBlockAddr, Pdb.getBlockSize()); + if (!Result) { + consumeError(Result.takeError()); + return false; + } + auto Data = Result->drop_front(OffsetInBlock); + Buffer = ArrayRef<uint8_t>(Data.data(), Size); + return true; +} + +Error MappedBlockStream::readBytes(uint32_t Offset, + MutableArrayRef<uint8_t> Buffer) const { + uint32_t BlockNum = Offset / Pdb.getBlockSize(); + uint32_t OffsetInBlock = Offset % Pdb.getBlockSize(); + + // Make sure we aren't trying to read beyond the end of the stream. + if (Buffer.size() > Data->getLength()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + if (Offset > Data->getLength() - Buffer.size()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + + uint32_t BytesLeft = Buffer.size(); + uint32_t BytesWritten = 0; + uint8_t *WriteBuffer = Buffer.data(); + auto BlockList = Data->getStreamBlocks(); + while (BytesLeft > 0) { + uint32_t StreamBlockAddr = BlockList[BlockNum]; + + auto Result = Pdb.getBlockData(StreamBlockAddr, Pdb.getBlockSize()); + if (!Result) + return Result.takeError(); + + auto Data = *Result; + const uint8_t *ChunkStart = Data.data() + OffsetInBlock; + uint32_t BytesInChunk = + std::min(BytesLeft, Pdb.getBlockSize() - OffsetInBlock); + ::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk); + + BytesWritten += BytesInChunk; + BytesLeft -= BytesInChunk; + ++BlockNum; + OffsetInBlock = 0; + } + + return Error::success(); +} + +Error MappedBlockStream::writeBytes(uint32_t Offset, + ArrayRef<uint8_t> Buffer) const { + // Make sure we aren't trying to write beyond the end of the stream. + if (Buffer.size() > Data->getLength()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + + if (Offset > Data->getLength() - Buffer.size()) + return make_error<RawError>(raw_error_code::insufficient_buffer); + + uint32_t BlockNum = Offset / Pdb.getBlockSize(); + uint32_t OffsetInBlock = Offset % Pdb.getBlockSize(); + + uint32_t BytesLeft = Buffer.size(); + auto BlockList = Data->getStreamBlocks(); + uint32_t BytesWritten = 0; + while (BytesLeft > 0) { + uint32_t StreamBlockAddr = BlockList[BlockNum]; + uint32_t BytesToWriteInChunk = + std::min(BytesLeft, Pdb.getBlockSize() - OffsetInBlock); + + const uint8_t *Chunk = Buffer.data() + BytesWritten; + ArrayRef<uint8_t> ChunkData(Chunk, BytesToWriteInChunk); + if (auto EC = Pdb.setBlockData(StreamBlockAddr, OffsetInBlock, ChunkData)) + return EC; + + BytesLeft -= BytesToWriteInChunk; + BytesWritten += BytesToWriteInChunk; + ++BlockNum; + OffsetInBlock = 0; + } + + // If this write overlapped a read which previously came from the pool, + // someone may still be holding a pointer to that alloc which is now invalid. + // Compute the overlapping range and update the cache entry, so any + // outstanding buffers are automatically updated. + for (const auto &MapEntry : CacheMap) { + // If the end of the written extent precedes the beginning of the cached + // extent, ignore this map entry. + if (Offset + BytesWritten < MapEntry.first) + continue; + for (const auto &Alloc : MapEntry.second) { + // If the end of the cached extent precedes the beginning of the written + // extent, ignore this alloc. + if (MapEntry.first + Alloc.size() < Offset) + continue; + + // If we get here, they are guaranteed to overlap. + Interval WriteInterval = std::make_pair(Offset, Offset + BytesWritten); + Interval CachedInterval = + std::make_pair(MapEntry.first, MapEntry.first + Alloc.size()); + // If they overlap, we need to write the new data into the overlapping + // range. + auto Intersection = intersect(WriteInterval, CachedInterval); + assert(Intersection.first <= Intersection.second); + + uint32_t Length = Intersection.second - Intersection.first; + uint32_t SrcOffset = + AbsoluteDifference(WriteInterval.first, Intersection.first); + uint32_t DestOffset = + AbsoluteDifference(CachedInterval.first, Intersection.first); + ::memcpy(Alloc.data() + DestOffset, Buffer.data() + SrcOffset, Length); + } + } + + return Error::success(); +} + +uint32_t MappedBlockStream::getNumBytesCopied() const { + return static_cast<uint32_t>(Pool.getBytesAllocated()); +} + +Expected<std::unique_ptr<MappedBlockStream>> +MappedBlockStream::createIndexedStream(uint32_t StreamIdx, + const IPDBFile &File) { + if (StreamIdx >= File.getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto Data = llvm::make_unique<IndexedStreamData>(StreamIdx, File); + return llvm::make_unique<MappedBlockStreamImpl>(std::move(Data), File); +} + +Expected<std::unique_ptr<MappedBlockStream>> +MappedBlockStream::createDirectoryStream(const PDBFile &File) { + auto Data = llvm::make_unique<DirectoryStreamData>(File); + return llvm::make_unique<MappedBlockStreamImpl>(std::move(Data), File); +} diff --git a/lib/DebugInfo/PDB/Raw/ModInfo.cpp b/lib/DebugInfo/PDB/Raw/ModInfo.cpp new file mode 100644 index 000000000000..bae135f77bc0 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/ModInfo.cpp @@ -0,0 +1,127 @@ +//===- ModInfo.cpp - PDB module information -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/ModInfo.h" + +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::pdb; +using namespace llvm::support; + +namespace { + +struct SCBytes { + ulittle16_t Section; + char Padding1[2]; + little32_t Offset; + little32_t Size; + ulittle32_t Characteristics; + ulittle16_t ModuleIndex; + char Padding2[2]; + ulittle32_t DataCrc; + ulittle32_t RelocCrc; +}; + +// struct Flags { +// uint16_t fWritten : 1; // True if ModInfo is dirty +// uint16_t fECEnabled : 1; // Is EC symbolic info present? (What is EC?) +// uint16_t unused : 6; // Reserved +// uint16_t iTSM : 8; // Type Server Index for this module +//}; +const uint16_t HasECFlagMask = 0x2; + +const uint16_t TypeServerIndexMask = 0xFF00; +const uint16_t TypeServerIndexShift = 8; +} + +struct ModInfo::FileLayout { + ulittle32_t Mod; // Currently opened module. This field is a + // pointer in the reference implementation, but + // that won't work on 64-bit systems, and anyway + // it doesn't make sense to read a pointer from a + // file. For now it is unused, so just ignore it. + SCBytes SC; // First section contribution of this module. + ulittle16_t Flags; // See Flags definition. + ulittle16_t ModDiStream; // Stream Number of module debug info + ulittle32_t SymBytes; // Size of local symbol debug info in above stream + ulittle32_t LineBytes; // Size of line number debug info in above stream + ulittle32_t C13Bytes; // Size of C13 line number info in above stream + ulittle16_t NumFiles; // Number of files contributing to this module + char Padding1[2]; // Padding so the next field is 4-byte aligned. + ulittle32_t FileNameOffs; // array of [0..NumFiles) DBI name buffer offsets. + // This field is a pointer in the reference + // implementation, but as with `Mod`, we ignore it + // for now since it is unused. + ulittle32_t SrcFileNameNI; // Name Index for src file name + ulittle32_t PdbFilePathNI; // Name Index for path to compiler PDB + // Null terminated Module name + // Null terminated Obj File Name +}; + +ModInfo::ModInfo() : Layout(nullptr) {} + +ModInfo::ModInfo(const ModInfo &Info) + : ModuleName(Info.ModuleName), ObjFileName(Info.ObjFileName), + Layout(Info.Layout) {} + +ModInfo::~ModInfo() {} + +Error ModInfo::initialize(codeview::StreamRef Stream, ModInfo &Info) { + codeview::StreamReader Reader(Stream); + if (auto EC = Reader.readObject(Info.Layout)) + return EC; + + if (auto EC = Reader.readZeroString(Info.ModuleName)) + return EC; + + if (auto EC = Reader.readZeroString(Info.ObjFileName)) + return EC; + return Error::success(); +} + +bool ModInfo::hasECInfo() const { return (Layout->Flags & HasECFlagMask) != 0; } + +uint16_t ModInfo::getTypeServerIndex() const { + return (Layout->Flags & TypeServerIndexMask) >> TypeServerIndexShift; +} + +uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; } + +uint32_t ModInfo::getSymbolDebugInfoByteSize() const { + return Layout->SymBytes; +} + +uint32_t ModInfo::getLineInfoByteSize() const { return Layout->LineBytes; } + +uint32_t ModInfo::getC13LineInfoByteSize() const { return Layout->C13Bytes; } + +uint32_t ModInfo::getNumberOfFiles() const { return Layout->NumFiles; } + +uint32_t ModInfo::getSourceFileNameIndex() const { + return Layout->SrcFileNameNI; +} + +uint32_t ModInfo::getPdbFilePathNameIndex() const { + return Layout->PdbFilePathNI; +} + +StringRef ModInfo::getModuleName() const { return ModuleName; } + +StringRef ModInfo::getObjFileName() const { return ObjFileName; } + +uint32_t ModInfo::getRecordLength() const { + uint32_t M = ModuleName.str().size() + 1; + uint32_t O = ObjFileName.str().size() + 1; + uint32_t Size = sizeof(FileLayout) + M + O; + Size = llvm::alignTo(Size, 4); + return Size; +} diff --git a/lib/DebugInfo/PDB/Raw/ModStream.cpp b/lib/DebugInfo/PDB/Raw/ModStream.cpp new file mode 100644 index 000000000000..3415fcd47790 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/ModStream.cpp @@ -0,0 +1,82 @@ +//===- ModStream.cpp - PDB Module Info Stream Access ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/ModStream.h" + +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/ModInfo.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" + +using namespace llvm; +using namespace llvm::pdb; + +ModStream::ModStream(const ModInfo &Module, + std::unique_ptr<MappedBlockStream> Stream) + : Mod(Module), Stream(std::move(Stream)) {} + +ModStream::~ModStream() {} + +Error ModStream::reload() { + codeview::StreamReader Reader(*Stream); + + uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize(); + uint32_t C11Size = Mod.getLineInfoByteSize(); + uint32_t C13Size = Mod.getC13LineInfoByteSize(); + + if (C11Size > 0 && C13Size > 0) + return llvm::make_error<RawError>(raw_error_code::corrupt_file, + "Module has both C11 and C13 line info"); + + codeview::StreamRef S; + + uint32_t SymbolSubstreamSig = 0; + if (auto EC = Reader.readInteger(SymbolSubstreamSig)) + return EC; + if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) + return EC; + + if (auto EC = Reader.readStreamRef(LinesSubstream, C11Size)) + return EC; + if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) + return EC; + + codeview::StreamReader LineReader(C13LinesSubstream); + if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining())) + return EC; + + uint32_t GlobalRefsSize; + if (auto EC = Reader.readInteger(GlobalRefsSize)) + return EC; + if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize)) + return EC; + if (Reader.bytesRemaining() > 0) + return llvm::make_error<RawError>(raw_error_code::corrupt_file, + "Unexpected bytes in module stream."); + + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +ModStream::symbols(bool *HadError) const { + // It's OK if the stream is empty. + if (SymbolsSubstream.getUnderlyingStream().getLength() == 0) + return llvm::make_range(SymbolsSubstream.end(), SymbolsSubstream.end()); + return llvm::make_range(SymbolsSubstream.begin(HadError), + SymbolsSubstream.end()); +} + +iterator_range<codeview::ModuleSubstreamArray::Iterator> +ModStream::lines(bool *HadError) const { + return llvm::make_range(LineInfo.begin(HadError), LineInfo.end()); +} + +Error ModStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Raw/MsfBuilder.cpp b/lib/DebugInfo/PDB/Raw/MsfBuilder.cpp new file mode 100644 index 000000000000..16b086bdaa98 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/MsfBuilder.cpp @@ -0,0 +1,279 @@ +//===- MSFBuilder.cpp - MSF Directory & Metadata Builder --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/MsfBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb; +using namespace llvm::pdb::msf; +using namespace llvm::support; + +namespace { +const uint32_t kSuperBlockBlock = 0; +const uint32_t kFreePageMap0Block = 1; +const uint32_t kFreePageMap1Block = 2; +const uint32_t kNumReservedPages = 3; + +const uint32_t kDefaultBlockMapAddr = kNumReservedPages; +} + +MsfBuilder::MsfBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow, + BumpPtrAllocator &Allocator) + : Allocator(Allocator), IsGrowable(CanGrow), BlockSize(BlockSize), + MininumBlocks(MinBlockCount), BlockMapAddr(kDefaultBlockMapAddr), + FreeBlocks(MinBlockCount, true) { + FreeBlocks[kSuperBlockBlock] = false; + FreeBlocks[kFreePageMap0Block] = false; + FreeBlocks[kFreePageMap1Block] = false; + FreeBlocks[BlockMapAddr] = false; +} + +Expected<MsfBuilder> MsfBuilder::create(BumpPtrAllocator &Allocator, + uint32_t BlockSize, + uint32_t MinBlockCount, bool CanGrow) { + if (!msf::isValidBlockSize(BlockSize)) + return make_error<RawError>(raw_error_code::unspecified, + "The requested block size is unsupported"); + + return MsfBuilder(BlockSize, + std::max(MinBlockCount, msf::getMinimumBlockCount()), + CanGrow, Allocator); +} + +Error MsfBuilder::setBlockMapAddr(uint32_t Addr) { + if (Addr == BlockMapAddr) + return Error::success(); + + if (Addr >= FreeBlocks.size()) { + if (!IsGrowable) + return make_error<RawError>(raw_error_code::unspecified, + "Cannot grow the number of blocks"); + FreeBlocks.resize(Addr + 1); + } + + if (!isBlockFree(Addr)) + return make_error<RawError>(raw_error_code::unspecified, + "Attempt to reuse an allocated block"); + FreeBlocks[BlockMapAddr] = true; + FreeBlocks[Addr] = false; + BlockMapAddr = Addr; + return Error::success(); +} + +void MsfBuilder::setFreePageMap(uint32_t Fpm) { FreePageMap = Fpm; } + +void MsfBuilder::setUnknown1(uint32_t Unk1) { Unknown1 = Unk1; } + +Error MsfBuilder::setDirectoryBlocksHint(ArrayRef<uint32_t> DirBlocks) { + for (auto B : DirectoryBlocks) + FreeBlocks[B] = true; + for (auto B : DirBlocks) { + if (!isBlockFree(B)) { + return make_error<RawError>(raw_error_code::unspecified, + "Attempt to reuse an allocated block"); + } + FreeBlocks[B] = false; + } + + DirectoryBlocks = DirBlocks; + return Error::success(); +} + +Error MsfBuilder::allocateBlocks(uint32_t NumBlocks, + MutableArrayRef<uint32_t> Blocks) { + if (NumBlocks == 0) + return Error::success(); + + uint32_t NumFreeBlocks = FreeBlocks.count(); + if (NumFreeBlocks < NumBlocks) { + if (!IsGrowable) + return make_error<RawError>(raw_error_code::unspecified, + "There are no free Blocks in the file"); + uint32_t AllocBlocks = NumBlocks - NumFreeBlocks; + FreeBlocks.resize(AllocBlocks + FreeBlocks.size(), true); + } + + int I = 0; + int Block = FreeBlocks.find_first(); + do { + assert(Block != -1 && "We ran out of Blocks!"); + + uint32_t NextBlock = static_cast<uint32_t>(Block); + Blocks[I++] = NextBlock; + FreeBlocks.reset(NextBlock); + Block = FreeBlocks.find_next(Block); + } while (--NumBlocks > 0); + return Error::success(); +} + +uint32_t MsfBuilder::getNumUsedBlocks() const { + return getTotalBlockCount() - getNumFreeBlocks(); +} + +uint32_t MsfBuilder::getNumFreeBlocks() const { return FreeBlocks.count(); } + +uint32_t MsfBuilder::getTotalBlockCount() const { return FreeBlocks.size(); } + +bool MsfBuilder::isBlockFree(uint32_t Idx) const { return FreeBlocks[Idx]; } + +Error MsfBuilder::addStream(uint32_t Size, ArrayRef<uint32_t> Blocks) { + // Add a new stream mapped to the specified blocks. Verify that the specified + // blocks are both necessary and sufficient for holding the requested number + // of bytes, and verify that all requested blocks are free. + uint32_t ReqBlocks = bytesToBlocks(Size, BlockSize); + if (ReqBlocks != Blocks.size()) + return make_error<RawError>( + raw_error_code::unspecified, + "Incorrect number of blocks for requested stream size"); + for (auto Block : Blocks) { + if (Block >= FreeBlocks.size()) + FreeBlocks.resize(Block + 1, true); + + if (!FreeBlocks.test(Block)) + return make_error<RawError>( + raw_error_code::unspecified, + "Attempt to re-use an already allocated block"); + } + // Mark all the blocks occupied by the new stream as not free. + for (auto Block : Blocks) { + FreeBlocks.reset(Block); + } + StreamData.push_back(std::make_pair(Size, Blocks)); + return Error::success(); +} + +Error MsfBuilder::addStream(uint32_t Size) { + uint32_t ReqBlocks = bytesToBlocks(Size, BlockSize); + std::vector<uint32_t> NewBlocks; + NewBlocks.resize(ReqBlocks); + if (auto EC = allocateBlocks(ReqBlocks, NewBlocks)) + return EC; + StreamData.push_back(std::make_pair(Size, NewBlocks)); + return Error::success(); +} + +Error MsfBuilder::setStreamSize(uint32_t Idx, uint32_t Size) { + uint32_t OldSize = getStreamSize(Idx); + if (OldSize == Size) + return Error::success(); + + uint32_t NewBlocks = bytesToBlocks(Size, BlockSize); + uint32_t OldBlocks = bytesToBlocks(OldSize, BlockSize); + + if (NewBlocks > OldBlocks) { + uint32_t AddedBlocks = NewBlocks - OldBlocks; + // If we're growing, we have to allocate new Blocks. + std::vector<uint32_t> AddedBlockList; + AddedBlockList.resize(AddedBlocks); + if (auto EC = allocateBlocks(AddedBlocks, AddedBlockList)) + return EC; + auto &CurrentBlocks = StreamData[Idx].second; + CurrentBlocks.insert(CurrentBlocks.end(), AddedBlockList.begin(), + AddedBlockList.end()); + } else if (OldBlocks > NewBlocks) { + // For shrinking, free all the Blocks in the Block map, update the stream + // data, then shrink the directory. + uint32_t RemovedBlocks = OldBlocks - NewBlocks; + auto CurrentBlocks = ArrayRef<uint32_t>(StreamData[Idx].second); + auto RemovedBlockList = CurrentBlocks.drop_front(NewBlocks); + for (auto P : RemovedBlockList) + FreeBlocks[P] = true; + StreamData[Idx].second = CurrentBlocks.drop_back(RemovedBlocks); + } + + StreamData[Idx].first = Size; + return Error::success(); +} + +uint32_t MsfBuilder::getNumStreams() const { return StreamData.size(); } + +uint32_t MsfBuilder::getStreamSize(uint32_t StreamIdx) const { + return StreamData[StreamIdx].first; +} + +ArrayRef<uint32_t> MsfBuilder::getStreamBlocks(uint32_t StreamIdx) const { + return StreamData[StreamIdx].second; +} + +uint32_t MsfBuilder::computeDirectoryByteSize() const { + // The directory has the following layout, where each item is a ulittle32_t: + // NumStreams + // StreamSizes[NumStreams] + // StreamBlocks[NumStreams][] + uint32_t Size = sizeof(ulittle32_t); // NumStreams + Size += StreamData.size() * sizeof(ulittle32_t); // StreamSizes + for (const auto &D : StreamData) { + uint32_t ExpectedNumBlocks = bytesToBlocks(D.first, BlockSize); + assert(ExpectedNumBlocks == D.second.size() && + "Unexpected number of blocks"); + Size += ExpectedNumBlocks * sizeof(ulittle32_t); + } + return Size; +} + +Expected<Layout> MsfBuilder::build() { + Layout L; + L.SB = Allocator.Allocate<SuperBlock>(); + std::memcpy(L.SB->MagicBytes, Magic, sizeof(Magic)); + L.SB->BlockMapAddr = BlockMapAddr; + L.SB->BlockSize = BlockSize; + L.SB->NumDirectoryBytes = computeDirectoryByteSize(); + L.SB->FreeBlockMapBlock = FreePageMap; + L.SB->Unknown1 = Unknown1; + + uint32_t NumDirectoryBlocks = + bytesToBlocks(L.SB->NumDirectoryBytes, BlockSize); + if (NumDirectoryBlocks > DirectoryBlocks.size()) { + // Our hint wasn't enough to satisfy the entire directory. Allocate + // remaining pages. + std::vector<uint32_t> ExtraBlocks; + uint32_t NumExtraBlocks = NumDirectoryBlocks - DirectoryBlocks.size(); + ExtraBlocks.resize(NumExtraBlocks); + if (auto EC = allocateBlocks(NumExtraBlocks, ExtraBlocks)) + return std::move(EC); + DirectoryBlocks.insert(DirectoryBlocks.end(), ExtraBlocks.begin(), + ExtraBlocks.end()); + } else if (NumDirectoryBlocks < DirectoryBlocks.size()) { + uint32_t NumUnnecessaryBlocks = DirectoryBlocks.size() - NumDirectoryBlocks; + for (auto B : + ArrayRef<uint32_t>(DirectoryBlocks).drop_back(NumUnnecessaryBlocks)) + FreeBlocks[B] = true; + DirectoryBlocks.resize(NumDirectoryBlocks); + } + + // Don't set the number of blocks in the file until after allocating Blocks + // for + // the directory, since the allocation might cause the file to need to grow. + L.SB->NumBlocks = FreeBlocks.size(); + + ulittle32_t *DirBlocks = Allocator.Allocate<ulittle32_t>(NumDirectoryBlocks); + std::uninitialized_copy_n(DirectoryBlocks.begin(), NumDirectoryBlocks, + DirBlocks); + L.DirectoryBlocks = ArrayRef<ulittle32_t>(DirBlocks, NumDirectoryBlocks); + + // The stream sizes should be re-allocated as a stable pointer and the stream + // map should have each of its entries allocated as a separate stable pointer. + if (StreamData.size() > 0) { + ulittle32_t *Sizes = Allocator.Allocate<ulittle32_t>(StreamData.size()); + L.StreamSizes = ArrayRef<ulittle32_t>(Sizes, StreamData.size()); + L.StreamMap.resize(StreamData.size()); + for (uint32_t I = 0; I < StreamData.size(); ++I) { + Sizes[I] = StreamData[I].first; + ulittle32_t *BlockList = + Allocator.Allocate<ulittle32_t>(StreamData[I].second.size()); + std::uninitialized_copy_n(StreamData[I].second.begin(), + StreamData[I].second.size(), BlockList); + L.StreamMap[I] = + ArrayRef<ulittle32_t>(BlockList, StreamData[I].second.size()); + } + } + + return L; +} diff --git a/lib/DebugInfo/PDB/Raw/MsfCommon.cpp b/lib/DebugInfo/PDB/Raw/MsfCommon.cpp new file mode 100644 index 000000000000..5d97f33e1103 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/MsfCommon.cpp @@ -0,0 +1,48 @@ +//===- MsfCommon.cpp - Common types and functions for MSF files -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/MsfCommon.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb::msf; + +Error llvm::pdb::msf::validateSuperBlock(const SuperBlock &SB) { + // Check the magic bytes. + if (std::memcmp(SB.MagicBytes, Magic, sizeof(Magic)) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "MSF magic header doesn't match"); + + if (!isValidBlockSize(SB.BlockSize)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported block size."); + + // We don't support directories whose sizes aren't a multiple of four bytes. + if (SB.NumDirectoryBytes % sizeof(support::ulittle32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Directory size is not multiple of 4."); + + // The number of blocks which comprise the directory is a simple function of + // the number of bytes it contains. + uint64_t NumDirectoryBlocks = + bytesToBlocks(SB.NumDirectoryBytes, SB.BlockSize); + + // The directory, as we understand it, is a block which consists of a list of + // block numbers. It is unclear what would happen if the number of blocks + // couldn't fit on a single block. + if (NumDirectoryBlocks > SB.BlockSize / sizeof(support::ulittle32_t)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Too many directory blocks."); + + if (SB.BlockMapAddr == 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Block 0 is reserved"); + + return Error::success(); +} diff --git a/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/lib/DebugInfo/PDB/Raw/NameHashTable.cpp new file mode 100644 index 000000000000..ae4ebf27721e --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/NameHashTable.cpp @@ -0,0 +1,104 @@ +//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/PDB/Raw/Hash.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {} + +Error NameHashTable::load(codeview::StreamReader &Stream) { + struct Header { + support::ulittle32_t Signature; + support::ulittle32_t HashVersion; + support::ulittle32_t ByteSize; + }; + + const Header *H; + if (auto EC = Stream.readObject(H)) + return EC; + + if (H->Signature != 0xEFFEEFFE) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (H->HashVersion != 1 && H->HashVersion != 2) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported hash version"); + + Signature = H->Signature; + HashVersion = H->HashVersion; + if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + + const support::ulittle32_t *HashCount; + if (auto EC = Stream.readObject(HashCount)) + return EC; + + if (auto EC = Stream.readArray(IDs, *HashCount)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read bucket array")); + + if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Missing name count"); + + if (auto EC = Stream.readInteger(NameCount)) + return EC; + return Error::success(); +} + +StringRef NameHashTable::getStringForID(uint32_t ID) const { + if (ID == IDs[0]) + return StringRef(); + + // NamesBuffer is a buffer of null terminated strings back to back. ID is + // the starting offset of the string we're looking for. So just seek into + // the desired offset and a read a null terminated stream from that offset. + StringRef Result; + codeview::StreamReader NameReader(NamesBuffer); + NameReader.setOffset(ID); + if (auto EC = NameReader.readZeroString(Result)) + consumeError(std::move(EC)); + return Result; +} + +uint32_t NameHashTable::getIDForString(StringRef Str) const { + uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + StringRef S = getStringForID(ID); + if (S == Str) + return ID; + } + // IDs[0] contains the ID of the "invalid" entry. + return IDs[0]; +} + +codeview::FixedStreamArray<support::ulittle32_t> +NameHashTable::name_ids() const { + return IDs; +} diff --git a/lib/DebugInfo/PDB/Raw/NameMap.cpp b/lib/DebugInfo/PDB/Raw/NameMap.cpp new file mode 100644 index 000000000000..b8a4eb79a48c --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/NameMap.cpp @@ -0,0 +1,213 @@ +//===- NameMap.cpp - PDB Name Map -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameMap.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NameMap::NameMap() {} + +Error NameMap::load(codeview::StreamReader &Stream) { + + // This is some sort of weird string-set/hash table encoded in the stream. + // It starts with the number of bytes in the table. + uint32_t NumberOfBytes; + if (auto EC = Stream.readInteger(NumberOfBytes)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map length")); + if (Stream.bytesRemaining() < NumberOfBytes) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid name map length"); + + // Following that field is the starting offset of strings in the name table. + uint32_t StringsOffset = Stream.getOffset(); + Stream.setOffset(StringsOffset + NumberOfBytes); + + // This appears to be equivalent to the total number of strings *actually* + // in the name table. + uint32_t HashSize; + if (auto EC = Stream.readInteger(HashSize)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map hash size")); + + // This appears to be an upper bound on the number of strings in the name + // table. + uint32_t MaxNumberOfStrings; + if (auto EC = Stream.readInteger(MaxNumberOfStrings)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map max strings")); + + if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t))) + return make_error<RawError>(raw_error_code::corrupt_file, + "Implausible number of strings"); + + const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8); + + // This appears to be a hash table which uses bitfields to determine whether + // or not a bucket is 'present'. + uint32_t NumPresentWords; + if (auto EC = Stream.readInteger(NumPresentWords)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map num words")); + + if (NumPresentWords > MaxNumberOfWords) + return make_error<RawError>(raw_error_code::corrupt_file, + "Number of present words is too large"); + + SparseBitVector<> Present; + for (uint32_t I = 0; I != NumPresentWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + Present.set((I * 32) + Idx); + } + + // This appears to be a hash table which uses bitfields to determine whether + // or not a bucket is 'deleted'. + uint32_t NumDeletedWords; + if (auto EC = Stream.readInteger(NumDeletedWords)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map num deleted words")); + + if (NumDeletedWords > MaxNumberOfWords) + return make_error<RawError>(raw_error_code::corrupt_file, + "Number of deleted words is too large"); + + SparseBitVector<> Deleted; + for (uint32_t I = 0; I != NumDeletedWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + Deleted.set((I * 32) + Idx); + } + + for (unsigned I : Present) { + // For all present entries, dump out their mapping. + (void)I; + + // This appears to be an offset relative to the start of the strings. + // It tells us where the null-terminated string begins. + uint32_t NameOffset; + if (auto EC = Stream.readInteger(NameOffset)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map name offset")); + + // This appears to be a stream number into the stream directory. + uint32_t NameIndex; + if (auto EC = Stream.readInteger(NameIndex)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map name index")); + + // Compute the offset of the start of the string relative to the stream. + uint32_t StringOffset = StringsOffset + NameOffset; + uint32_t OldOffset = Stream.getOffset(); + // Pump out our c-string from the stream. + StringRef Str; + Stream.setOffset(StringOffset); + if (auto EC = Stream.readZeroString(Str)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map name")); + + Stream.setOffset(OldOffset); + // Add this to a string-map from name to stream number. + Mapping.insert({Str, NameIndex}); + } + + return Error::success(); +} + +Error NameMap::commit(codeview::StreamWriter &Writer) { + // The first field is the number of bytes of string data. So add + // up the length of all strings plus a null terminator for each + // one. + uint32_t NumBytes = 0; + for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) { + NumBytes += B->getKeyLength() + 1; + } + + if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data + return EC; + // Now all of the string data itself. + for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) { + if (auto EC = Writer.writeZeroString(B->getKey())) + return EC; + } + + if (auto EC = Writer.writeInteger(Mapping.size())) // Hash Size + return EC; + + if (auto EC = Writer.writeInteger(Mapping.size())) // Max Number of Strings + return EC; + + if (auto EC = Writer.writeInteger(Mapping.size())) // Num Present Words + return EC; + + // For each entry in the mapping, write a bit mask which represents a bucket + // to store it in. We don't use this, so the value we write isn't important + // to us, it just has to be there. + for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) { + if (auto EC = Writer.writeInteger(1U)) + return EC; + } + + if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words + return EC; + + // Mappings of each word. + uint32_t OffsetSoFar = 0; + for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) { + // This is a list of key value pairs where the key is the offset into the + // strings buffer, and the value is a stream number. Write each pair. + if (auto EC = Writer.writeInteger(OffsetSoFar)) + return EC; + + if (auto EC = Writer.writeInteger(B->second)) + return EC; + + OffsetSoFar += B->getKeyLength() + 1; + } + + return Error::success(); +} + +iterator_range<StringMapConstIterator<uint32_t>> NameMap::entries() const { + return llvm::make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(), + Mapping.end()); +} + +bool NameMap::tryGetValue(StringRef Name, uint32_t &Value) const { + auto Iter = Mapping.find(Name); + if (Iter == Mapping.end()) + return false; + Value = Iter->second; + return true; +} diff --git a/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp b/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp new file mode 100644 index 000000000000..41c6c2cd810a --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp @@ -0,0 +1,50 @@ +//===- NameMapBuilder.cpp - PDB Name Map Builder ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameMapBuilder.h" + +#include "llvm/DebugInfo/PDB/Raw/NameMap.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::pdb; + +NameMapBuilder::NameMapBuilder() {} + +void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) { + StringDataBytes += Name.size() + 1; + Map.insert({Name, Mapping}); +} + +Expected<std::unique_ptr<NameMap>> NameMapBuilder::build() { + auto Result = llvm::make_unique<NameMap>(); + Result->Mapping = Map; + return std::move(Result); +} + +uint32_t NameMapBuilder::calculateSerializedLength() const { + uint32_t TotalLength = 0; + + TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value + TotalLength += StringDataBytes; // actual string data + + TotalLength += sizeof(support::ulittle32_t); // Hash Size + TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings + TotalLength += sizeof(support::ulittle32_t); // Num Present Words + // One bitmask word for each present entry + TotalLength += Map.size() * sizeof(support::ulittle32_t); + TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words + + // For each present word, which we are treating as equivalent to the number of + // entries in the table, we have a pair of integers. An offset into the + // string data, and a corresponding stream number. + TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t); + + return TotalLength; +} diff --git a/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/lib/DebugInfo/PDB/Raw/PDBFile.cpp new file mode 100644 index 000000000000..95016753dc14 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/PDBFile.cpp @@ -0,0 +1,365 @@ +//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/StreamArray.h" +#include "llvm/DebugInfo/CodeView/StreamInterface.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" +#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +namespace { +typedef FixedStreamArray<support::ulittle32_t> ulittle_array; +} + +PDBFile::PDBFile(std::unique_ptr<StreamInterface> PdbFileBuffer) + : Buffer(std::move(PdbFileBuffer)), SB(nullptr) {} + +PDBFile::~PDBFile() {} + +uint32_t PDBFile::getBlockSize() const { return SB->BlockSize; } + +uint32_t PDBFile::getFreeBlockMapBlock() const { return SB->FreeBlockMapBlock; } + +uint32_t PDBFile::getBlockCount() const { return SB->NumBlocks; } + +uint32_t PDBFile::getNumDirectoryBytes() const { return SB->NumDirectoryBytes; } + +uint32_t PDBFile::getBlockMapIndex() const { return SB->BlockMapAddr; } + +uint32_t PDBFile::getUnknown1() const { return SB->Unknown1; } + +uint32_t PDBFile::getNumDirectoryBlocks() const { + return msf::bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize); +} + +uint64_t PDBFile::getBlockMapOffset() const { + return (uint64_t)SB->BlockMapAddr * SB->BlockSize; +} + +uint32_t PDBFile::getNumStreams() const { return StreamSizes.size(); } + +uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { + return StreamSizes[StreamIndex]; +} + +ArrayRef<support::ulittle32_t> +PDBFile::getStreamBlockList(uint32_t StreamIndex) const { + return StreamMap[StreamIndex]; +} + +uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } + +Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, + uint32_t NumBytes) const { + uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); + + ArrayRef<uint8_t> Result; + if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) + return std::move(EC); + return Result; +} + +Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, + ArrayRef<uint8_t> Data) const { + if (Offset >= getBlockSize()) + return make_error<RawError>( + raw_error_code::invalid_block_address, + "setBlockData attempted to write out of block bounds."); + if (Data.size() > getBlockSize() - Offset) + return make_error<RawError>( + raw_error_code::invalid_block_address, + "setBlockData attempted to write out of block bounds."); + + uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); + StreamBlockOffset += Offset; + return Buffer->writeBytes(StreamBlockOffset, Data); +} + +Error PDBFile::parseFileHeaders() { + StreamReader Reader(*Buffer); + + if (auto EC = Reader.readObject(SB)) { + consumeError(std::move(EC)); + return make_error<RawError>(raw_error_code::corrupt_file, + "Does not contain superblock"); + } + + if (auto EC = setSuperBlock(SB)) + return EC; + + Reader.setOffset(getBlockMapOffset()); + if (auto EC = Reader.readArray(DirectoryBlocks, getNumDirectoryBlocks())) + return EC; + + return Error::success(); +} + +Error PDBFile::parseStreamData() { + assert(SB); + if (DirectoryStream) + return Error::success(); + + uint32_t NumStreams = 0; + + // Normally you can't use a MappedBlockStream without having fully parsed the + // PDB file, because it accesses the directory and various other things, which + // is exactly what we are attempting to parse. By specifying a custom + // subclass of IPDBStreamData which only accesses the fields that have already + // been parsed, we can avoid this and reuse MappedBlockStream. + auto DS = MappedBlockStream::createDirectoryStream(*this); + if (!DS) + return DS.takeError(); + StreamReader Reader(**DS); + if (auto EC = Reader.readInteger(NumStreams)) + return EC; + + if (auto EC = Reader.readArray(StreamSizes, NumStreams)) + return EC; + for (uint32_t I = 0; I < NumStreams; ++I) { + uint32_t StreamSize = getStreamByteSize(I); + // FIXME: What does StreamSize ~0U mean? + uint64_t NumExpectedStreamBlocks = + StreamSize == UINT32_MAX ? 0 : msf::bytesToBlocks(StreamSize, + SB->BlockSize); + + // For convenience, we store the block array contiguously. This is because + // if someone calls setStreamMap(), it is more convenient to be able to call + // it with an ArrayRef instead of setting up a StreamRef. Since the + // DirectoryStream is cached in the class and thus lives for the life of the + // class, we can be guaranteed that readArray() will return a stable + // reference, even if it has to allocate from its internal pool. + ArrayRef<support::ulittle32_t> Blocks; + if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) + return EC; + for (uint32_t Block : Blocks) { + uint64_t BlockEndOffset = (uint64_t)(Block + 1) * SB->BlockSize; + if (BlockEndOffset > getFileSize()) + return make_error<RawError>(raw_error_code::corrupt_file, + "Stream block map is corrupt."); + } + StreamMap.push_back(Blocks); + } + + // We should have read exactly SB->NumDirectoryBytes bytes. + assert(Reader.bytesRemaining() == 0); + DirectoryStream = std::move(*DS); + return Error::success(); +} + +llvm::ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { + return DirectoryBlocks; +} + +Expected<InfoStream &> PDBFile::getPDBInfoStream() { + if (!Info) { + auto InfoS = MappedBlockStream::createIndexedStream(StreamPDB, *this); + if (!InfoS) + return InfoS.takeError(); + auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); + if (auto EC = TempInfo->reload()) + return std::move(EC); + Info = std::move(TempInfo); + } + return *Info; +} + +Expected<DbiStream &> PDBFile::getPDBDbiStream() { + if (!Dbi) { + auto DbiS = MappedBlockStream::createIndexedStream(StreamDBI, *this); + if (!DbiS) + return DbiS.takeError(); + auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS)); + if (auto EC = TempDbi->reload()) + return std::move(EC); + Dbi = std::move(TempDbi); + } + return *Dbi; +} + +Expected<TpiStream &> PDBFile::getPDBTpiStream() { + if (!Tpi) { + auto TpiS = MappedBlockStream::createIndexedStream(StreamTPI, *this); + if (!TpiS) + return TpiS.takeError(); + auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); + if (auto EC = TempTpi->reload()) + return std::move(EC); + Tpi = std::move(TempTpi); + } + return *Tpi; +} + +Expected<TpiStream &> PDBFile::getPDBIpiStream() { + if (!Ipi) { + auto IpiS = MappedBlockStream::createIndexedStream(StreamIPI, *this); + if (!IpiS) + return IpiS.takeError(); + auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); + if (auto EC = TempIpi->reload()) + return std::move(EC); + Ipi = std::move(TempIpi); + } + return *Ipi; +} + +Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { + if (!Publics) { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return DbiS.takeError(); + + uint32_t PublicsStreamNum = DbiS->getPublicSymbolStreamIndex(); + + auto PublicS = + MappedBlockStream::createIndexedStream(PublicsStreamNum, *this); + if (!PublicS) + return PublicS.takeError(); + auto TempPublics = + llvm::make_unique<PublicsStream>(*this, std::move(*PublicS)); + if (auto EC = TempPublics->reload()) + return std::move(EC); + Publics = std::move(TempPublics); + } + return *Publics; +} + +Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { + if (!Symbols) { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return DbiS.takeError(); + + uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); + + auto SymbolS = + MappedBlockStream::createIndexedStream(SymbolStreamNum, *this); + if (!SymbolS) + return SymbolS.takeError(); + auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); + if (auto EC = TempSymbols->reload()) + return std::move(EC); + Symbols = std::move(TempSymbols); + } + return *Symbols; +} + +Expected<NameHashTable &> PDBFile::getStringTable() { + if (!StringTable || !StringTableStream) { + auto IS = getPDBInfoStream(); + if (!IS) + return IS.takeError(); + + uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names"); + + if (NameStreamIndex == 0) + return make_error<RawError>(raw_error_code::no_stream); + if (NameStreamIndex >= getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto NS = MappedBlockStream::createIndexedStream(NameStreamIndex, *this); + if (!NS) + return NS.takeError(); + + StreamReader Reader(**NS); + auto N = llvm::make_unique<NameHashTable>(); + if (auto EC = N->load(Reader)) + return std::move(EC); + StringTable = std::move(N); + StringTableStream = std::move(*NS); + } + return *StringTable; +} + +Error PDBFile::setSuperBlock(const msf::SuperBlock *Block) { + if (auto EC = msf::validateSuperBlock(*Block)) + return EC; + + if (Buffer->getLength() % SB->BlockSize != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "File size is not a multiple of block size"); + + SB = Block; + return Error::success(); +} + +Error PDBFile::commit() { + StreamWriter Writer(*Buffer); + + if (auto EC = Writer.writeObject(*SB)) + return EC; + Writer.setOffset(getBlockMapOffset()); + if (auto EC = Writer.writeArray(DirectoryBlocks)) + return EC; + + auto DS = MappedBlockStream::createDirectoryStream(*this); + if (!DS) + return DS.takeError(); + auto DirStream = std::move(*DS); + StreamWriter DW(*DirStream); + if (auto EC = DW.writeInteger(this->getNumStreams())) + return EC; + + if (auto EC = DW.writeArray(StreamSizes)) + return EC; + + for (const auto &Blocks : StreamMap) { + if (auto EC = DW.writeArray(Blocks)) + return EC; + } + + if (Info) { + if (auto EC = Info->commit()) + return EC; + } + + if (Dbi) { + if (auto EC = Dbi->commit()) + return EC; + } + + if (Symbols) { + if (auto EC = Symbols->commit()) + return EC; + } + + if (Publics) { + if (auto EC = Publics->commit()) + return EC; + } + + if (Tpi) { + if (auto EC = Tpi->commit()) + return EC; + } + + if (Ipi) { + if (auto EC = Ipi->commit()) + return EC; + } + + return Buffer->commit(); +} diff --git a/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp new file mode 100644 index 000000000000..9063fd62d295 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp @@ -0,0 +1,102 @@ +//===- PDBFileBuilder.cpp - PDB File Creation -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h" + +#include "llvm/ADT/BitVector.h" + +#include "llvm/DebugInfo/CodeView/StreamInterface.h" +#include "llvm/DebugInfo/CodeView/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStream.h" +#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; +using namespace llvm::support; + +PDBFileBuilder::PDBFileBuilder( + std::unique_ptr<codeview::StreamInterface> FileBuffer) + : File(llvm::make_unique<PDBFile>(std::move(FileBuffer))) {} + +Error PDBFileBuilder::initialize(const msf::SuperBlock &Super) { + auto ExpectedMsf = + MsfBuilder::create(File->Allocator, Super.BlockSize, Super.NumBlocks); + if (!ExpectedMsf) + return ExpectedMsf.takeError(); + + auto &MsfResult = *ExpectedMsf; + if (auto EC = MsfResult.setBlockMapAddr(Super.BlockMapAddr)) + return EC; + Msf = llvm::make_unique<MsfBuilder>(std::move(MsfResult)); + Msf->setFreePageMap(Super.FreeBlockMapBlock); + Msf->setUnknown1(Super.Unknown1); + return Error::success(); +} + +MsfBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; } + +InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() { + if (!Info) + Info = llvm::make_unique<InfoStreamBuilder>(); + return *Info; +} + +DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() { + if (!Dbi) + Dbi = llvm::make_unique<DbiStreamBuilder>(); + return *Dbi; +} + +Expected<std::unique_ptr<PDBFile>> PDBFileBuilder::build() { + if (Info) { + uint32_t Length = Info->calculateSerializedLength(); + if (auto EC = Msf->setStreamSize(StreamPDB, Length)) + return std::move(EC); + } + if (Dbi) { + uint32_t Length = Dbi->calculateSerializedLength(); + if (auto EC = Msf->setStreamSize(StreamDBI, Length)) + return std::move(EC); + } + + auto ExpectedLayout = Msf->build(); + if (!ExpectedLayout) + return ExpectedLayout.takeError(); + + const msf::Layout &L = *ExpectedLayout; + File->StreamMap = L.StreamMap; + File->StreamSizes = L.StreamSizes; + File->DirectoryBlocks = L.DirectoryBlocks; + File->SB = L.SB; + + if (Info) { + auto ExpectedInfo = Info->build(*File); + if (!ExpectedInfo) + return ExpectedInfo.takeError(); + File->Info = std::move(*ExpectedInfo); + } + + if (Dbi) { + auto ExpectedDbi = Dbi->build(*File); + if (!ExpectedDbi) + return ExpectedDbi.takeError(); + File->Dbi = std::move(*ExpectedDbi); + } + + if (File->Info && File->Dbi && File->Info->getAge() != File->Dbi->getAge()) + return llvm::make_error<RawError>( + raw_error_code::corrupt_file, + "PDB Stream Age doesn't match Dbi Stream Age!"); + + return std::move(File); +} diff --git a/lib/DebugInfo/PDB/Raw/PublicsStream.cpp b/lib/DebugInfo/PDB/Raw/PublicsStream.cpp new file mode 100644 index 000000000000..af3d2d026b48 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/PublicsStream.cpp @@ -0,0 +1,173 @@ +//===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The data structures defined in this file are based on the reference +// implementation which is available at +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +// +// When you are reading the reference source code, you'd find the +// information below useful. +// +// - ppdb1->m_fMinimalDbgInfo seems to be always true. +// - SMALLBUCKETS macro is defined. +// +// The reference doesn't compile, so I learned just by reading code. +// It's not guaranteed to be correct. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h" + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + + +static const unsigned IPHR_HASH = 4096; + +// This is PSGSIHDR struct defined in +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +struct PublicsStream::HeaderInfo { + ulittle32_t SymHash; + ulittle32_t AddrMap; + ulittle32_t NumThunks; + ulittle32_t SizeOfThunk; + ulittle16_t ISectThunkTable; + char Padding[2]; + ulittle32_t OffThunkTable; + ulittle32_t NumSections; +}; + +// This is GSIHashHdr. +struct PublicsStream::GSIHashHeader { + enum : unsigned { + HdrSignature = ~0U, + HdrVersion = 0xeffe0000 + 19990810, + }; + ulittle32_t VerSignature; + ulittle32_t VerHdr; + ulittle32_t HrSize; + ulittle32_t NumBuckets; +}; + +PublicsStream::PublicsStream(PDBFile &File, + std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)) {} + +PublicsStream::~PublicsStream() {} + +uint32_t PublicsStream::getSymHash() const { return Header->SymHash; } +uint32_t PublicsStream::getAddrMap() const { return Header->AddrMap; } + +// Publics stream contains fixed-size headers and a serialized hash table. +// This implementation is not complete yet. It reads till the end of the +// stream so that we verify the stream is at least not corrupted. However, +// we skip over the hash table which we believe contains information about +// public symbols. +Error PublicsStream::reload() { + codeview::StreamReader Reader(*Stream); + + // Check stream size. + if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Publics Stream does not contain a header."); + + // Read PSGSIHDR and GSIHashHdr structs. + if (Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Publics Stream does not contain a header."); + + if (Reader.readObject(HashHdr)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Publics Stream does not contain a header."); + + // An array of HashRecord follows. Read them. + if (HashHdr->HrSize % sizeof(PSHashRecord)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid HR array size."); + uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord); + if (auto EC = Reader.readArray(HashRecords, NumHashRecords)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read an HR array")); + + // A bitmap of a fixed length follows. + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a bitmap.")); + for (uint8_t B : Bitmap) + NumBuckets += countPopulation(B); + + // We don't yet understand the following data structures completely, + // but we at least know the types and sizes. Here we are trying + // to read the stream till end so that we at least can detect + // corrupted streams. + + // Hash buckets follow. + if (auto EC = Reader.readArray(HashBuckets, NumBuckets)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Hash buckets corrupted.")); + + // Something called "address map" follows. + uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t); + if (auto EC = Reader.readArray(AddressMap, NumAddressMapEntries)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read an address map.")); + + // Something called "thunk map" follows. + if (auto EC = Reader.readArray(ThunkMap, Header->NumThunks)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a thunk map.")); + + // Something called "section map" follows. + if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a section map.")); + + if (Reader.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted publics stream."); + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +PublicsStream::getSymbols(bool *HadError) const { + auto SymbolS = Pdb.getPDBSymbolStream(); + if (SymbolS.takeError()) { + codeview::CVSymbolArray::Iterator Iter; + return llvm::make_range(Iter, Iter); + } + SymbolStream &SS = SymbolS.get(); + + return SS.getSymbols(HadError); +} + +Error PublicsStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Raw/RawError.cpp b/lib/DebugInfo/PDB/Raw/RawError.cpp new file mode 100644 index 000000000000..eb169f70e11c --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/RawError.cpp @@ -0,0 +1,67 @@ +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class RawErrorCategory : public std::error_category { +public: + const char *name() const LLVM_NOEXCEPT override { return "llvm.pdb.raw"; } + + std::string message(int Condition) const override { + switch (static_cast<raw_error_code>(Condition)) { + case raw_error_code::unspecified: + return "An unknown error has occurred."; + case raw_error_code::feature_unsupported: + return "The feature is unsupported by the implementation."; + case raw_error_code::corrupt_file: + return "The PDB file is corrupt."; + case raw_error_code::insufficient_buffer: + return "The buffer is not large enough to read the requested number of " + "bytes."; + case raw_error_code::no_stream: + return "The specified stream could not be loaded."; + case raw_error_code::index_out_of_bounds: + return "The specified item does not exist in the array."; + case raw_error_code::invalid_block_address: + return "The specified block address is not valid."; + case raw_error_code::not_writable: + return "The PDB does not support writing."; + case raw_error_code::invalid_tpi_hash: + return "The Type record has an invalid hash value."; + } + llvm_unreachable("Unrecognized raw_error_code"); + } +}; +} // end anonymous namespace + +static ManagedStatic<RawErrorCategory> Category; + +char RawError::ID = 0; + +RawError::RawError(raw_error_code C) : RawError(C, "") {} + +RawError::RawError(const std::string &Context) + : RawError(raw_error_code::unspecified, Context) {} + +RawError::RawError(raw_error_code C, const std::string &Context) : Code(C) { + ErrMsg = "Native PDB Error: "; + std::error_code EC = convertToErrorCode(); + if (Code != raw_error_code::unspecified) + ErrMsg += EC.message() + " "; + if (!Context.empty()) + ErrMsg += Context; +} + +void RawError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; } + +const std::string &RawError::getErrorMessage() const { return ErrMsg; } + +std::error_code RawError::convertToErrorCode() const { + return std::error_code(static_cast<int>(Code), *Category); +} diff --git a/lib/DebugInfo/PDB/Raw/RawSession.cpp b/lib/DebugInfo/PDB/Raw/RawSession.cpp new file mode 100644 index 000000000000..455d33140dd4 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/RawSession.cpp @@ -0,0 +1,146 @@ +//===- RawSession.cpp - Raw implementation of IPDBSession -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/RawSession.h" + +#include "llvm/DebugInfo/CodeView/ByteStream.h" +#include "llvm/DebugInfo/CodeView/StreamInterface.h" +#include "llvm/DebugInfo/PDB/GenericError.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" +#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +// We need a class which behaves like an immutable ByteStream, but whose data +// is backed by an llvm::MemoryBuffer. It also needs to own the underlying +// MemoryBuffer, so this simple adapter is a good way to achieve that. +class InputByteStream : public codeview::ByteStream<false> { +public: + explicit InputByteStream(std::unique_ptr<MemoryBuffer> Buffer) + : ByteStream(ArrayRef<uint8_t>(Buffer->getBuffer().bytes_begin(), + Buffer->getBuffer().bytes_end())), + MemBuffer(std::move(Buffer)) {} + + std::unique_ptr<MemoryBuffer> MemBuffer; +}; +} + +RawSession::RawSession(std::unique_ptr<PDBFile> PdbFile) + : Pdb(std::move(PdbFile)) {} + +RawSession::~RawSession() {} + +Error RawSession::createFromPdb(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = + MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return llvm::make_error<GenericError>(generic_error_code::invalid_path); + + std::unique_ptr<MemoryBuffer> Buffer = std::move(*ErrorOrBuffer); + auto Stream = llvm::make_unique<InputByteStream>(std::move(Buffer)); + + std::unique_ptr<PDBFile> File(new PDBFile(std::move(Stream))); + if (auto EC = File->parseFileHeaders()) + return EC; + if (auto EC = File->parseStreamData()) + return EC; + + Session.reset(new RawSession(std::move(File))); + + return Error::success(); +} + +Error RawSession::createFromExe(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + return llvm::make_error<RawError>(raw_error_code::feature_unsupported); +} + +uint64_t RawSession::getLoadAddress() const { return 0; } + +void RawSession::setLoadAddress(uint64_t Address) {} + +std::unique_ptr<PDBSymbolExe> RawSession::getGlobalScope() const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> RawSession::getSymbolById(uint32_t SymbolId) const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> +RawSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +RawSession::findLineNumbers(const PDBSymbolCompiland &Compiland, + const IPDBSourceFile &File) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +RawSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> +RawSession::findSourceFiles(const PDBSymbolCompiland *Compiland, + llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +RawSession::findOneSourceFile(const PDBSymbolCompiland *Compiland, + llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>> +RawSession::findCompilandsForSourceFile(llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<PDBSymbolCompiland> +RawSession::findOneCompilandForSourceFile(llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> RawSession::getAllSourceFiles() const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> RawSession::getSourceFilesForCompiland( + const PDBSymbolCompiland &Compiland) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +RawSession::getSourceFileById(uint32_t FileId) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumDataStreams> RawSession::getDebugStreams() const { + return nullptr; +} diff --git a/lib/DebugInfo/PDB/Raw/SymbolStream.cpp b/lib/DebugInfo/PDB/Raw/SymbolStream.cpp new file mode 100644 index 000000000000..41b2a64bfb14 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/SymbolStream.cpp @@ -0,0 +1,46 @@ +//===- SymbolStream.cpp - PDB Symbol Stream Access ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h" + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +SymbolStream::SymbolStream(std::unique_ptr<MappedBlockStream> Stream) + : Stream(std::move(Stream)) {} + +SymbolStream::~SymbolStream() {} + +Error SymbolStream::reload() { + codeview::StreamReader Reader(*Stream); + + if (auto EC = Reader.readArray(SymbolRecords, Stream->getLength())) + return EC; + + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +SymbolStream::getSymbols(bool *HadError) const { + return llvm::make_range(SymbolRecords.begin(HadError), SymbolRecords.end()); +} + +Error SymbolStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/lib/DebugInfo/PDB/Raw/TpiStream.cpp new file mode 100644 index 000000000000..5617e57ccf67 --- /dev/null +++ b/lib/DebugInfo/PDB/Raw/TpiStream.cpp @@ -0,0 +1,273 @@ +//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" + +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/StreamReader.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Raw/Hash.h" +#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" + +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::support; +using namespace llvm::pdb; + +namespace { +const uint32_t MinHashBuckets = 0x1000; +const uint32_t MaxHashBuckets = 0x40000; +} + +// This corresponds to `HDR` in PDB/dbi/tpi.h. +struct TpiStream::HeaderInfo { + struct EmbeddedBuf { + little32_t Off; + ulittle32_t Length; + }; + + ulittle32_t Version; + ulittle32_t HeaderSize; + ulittle32_t TypeIndexBegin; + ulittle32_t TypeIndexEnd; + ulittle32_t TypeRecordBytes; + + // The following members correspond to `TpiHash` in PDB/dbi/tpi.h. + ulittle16_t HashStreamIndex; + ulittle16_t HashAuxStreamIndex; + ulittle32_t HashKeySize; + ulittle32_t NumHashBuckets; + + EmbeddedBuf HashValueBuffer; + EmbeddedBuf IndexOffsetBuffer; + EmbeddedBuf HashAdjBuffer; +}; + +TpiStream::TpiStream(const PDBFile &File, + std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)) {} + +TpiStream::~TpiStream() {} + +// Corresponds to `fUDTAnon`. +template <typename T> static bool isAnonymous(T &Rec) { + StringRef Name = Rec.getName(); + return Name == "<unnamed-tag>" || Name == "__unnamed" || + Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed"); +} + +// Computes a hash for a given TPI record. +template <typename T> +static uint32_t getTpiHash(T &Rec, const CVRecord<TypeLeafKind> &RawRec) { + auto Opts = static_cast<uint16_t>(Rec.getOptions()); + + bool ForwardRef = + Opts & static_cast<uint16_t>(ClassOptions::ForwardReference); + bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped); + bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName); + bool IsAnon = UniqueName && isAnonymous(Rec); + + if (!ForwardRef && !Scoped && !IsAnon) + return hashStringV1(Rec.getName()); + if (!ForwardRef && UniqueName && !IsAnon) + return hashStringV1(Rec.getUniqueName()); + return hashBufferV8(RawRec.RawData); +} + +namespace { +class TpiHashVerifier : public TypeVisitorCallbacks { +public: + TpiHashVerifier(FixedStreamArray<support::ulittle32_t> &HashValues, + uint32_t NumHashBuckets) + : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {} + + Error visitUdtSourceLine(UdtSourceLineRecord &Rec) override { + return verifySourceLine(Rec); + } + + Error visitUdtModSourceLine(UdtModSourceLineRecord &Rec) override { + return verifySourceLine(Rec); + } + + Error visitClass(ClassRecord &Rec) override { return verify(Rec); } + Error visitEnum(EnumRecord &Rec) override { return verify(Rec); } + Error visitUnion(UnionRecord &Rec) override { return verify(Rec); } + + Error visitTypeBegin(const CVRecord<TypeLeafKind> &Rec) override { + ++Index; + RawRecord = &Rec; + return Error::success(); + } + +private: + template <typename T> Error verify(T &Rec) { + uint32_t Hash = getTpiHash(Rec, *RawRecord); + if (Hash % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); + } + + template <typename T> Error verifySourceLine(T &Rec) { + char Buf[4]; + support::endian::write32le(Buf, Rec.getUDT().getIndex()); + uint32_t Hash = hashStringV1(StringRef(Buf, 4)); + if (Hash % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); + } + + Error errorInvalidHash() { + return make_error<RawError>( + raw_error_code::invalid_tpi_hash, + "Type index is 0x" + utohexstr(TypeIndex::FirstNonSimpleIndex + Index)); + } + + FixedStreamArray<support::ulittle32_t> HashValues; + const CVRecord<TypeLeafKind> *RawRecord; + uint32_t NumHashBuckets; + uint32_t Index = -1; +}; +} + +// Verifies that a given type record matches with a given hash value. +// Currently we only verify SRC_LINE records. +Error TpiStream::verifyHashValues() { + TpiHashVerifier Verifier(HashValues, Header->NumHashBuckets); + CVTypeVisitor Visitor(Verifier); + return Visitor.visitTypeStream(TypeRecords); +} + +Error TpiStream::reload() { + StreamReader Reader(*Stream); + + if (Reader.bytesRemaining() < sizeof(HeaderInfo)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream does not contain a header."); + + if (Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream does not contain a header."); + + if (Header->Version != PdbTpiV80) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported TPI Version."); + + if (Header->HeaderSize != sizeof(HeaderInfo)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupt TPI Header size."); + + if (Header->HashKeySize != sizeof(ulittle32_t)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream expected 4 byte hash key size."); + + if (Header->NumHashBuckets < MinHashBuckets || + Header->NumHashBuckets > MaxHashBuckets) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream Invalid number of hash buckets."); + + // The actual type records themselves come from this stream + if (auto EC = Reader.readArray(TypeRecords, Header->TypeRecordBytes)) + return EC; + + // Hash indices, hash values, etc come from the hash stream. + if (Header->HashStreamIndex >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid TPI hash stream index."); + + auto HS = + MappedBlockStream::createIndexedStream(Header->HashStreamIndex, Pdb); + if (!HS) + return HS.takeError(); + StreamReader HSR(**HS); + + uint32_t NumHashValues = Header->HashValueBuffer.Length / sizeof(ulittle32_t); + if (NumHashValues != NumTypeRecords()) + return make_error<RawError>( + raw_error_code::corrupt_file, + "TPI hash count does not match with the number of type records."); + HSR.setOffset(Header->HashValueBuffer.Off); + if (auto EC = HSR.readArray(HashValues, NumHashValues)) + return EC; + + HSR.setOffset(Header->IndexOffsetBuffer.Off); + uint32_t NumTypeIndexOffsets = + Header->IndexOffsetBuffer.Length / sizeof(TypeIndexOffset); + if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets)) + return EC; + + HSR.setOffset(Header->HashAdjBuffer.Off); + uint32_t NumHashAdjustments = + Header->HashAdjBuffer.Length / sizeof(TypeIndexOffset); + if (auto EC = HSR.readArray(HashAdjustments, NumHashAdjustments)) + return EC; + + HashStream = std::move(*HS); + + // TPI hash table is a parallel array for the type records. + // Verify that the hash values match with type records. + if (auto EC = verifyHashValues()) + return EC; + + return Error::success(); +} + +PdbRaw_TpiVer TpiStream::getTpiVersion() const { + uint32_t Value = Header->Version; + return static_cast<PdbRaw_TpiVer>(Value); +} + +uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; } + +uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; } + +uint32_t TpiStream::NumTypeRecords() const { + return TypeIndexEnd() - TypeIndexBegin(); +} + +uint16_t TpiStream::getTypeHashStreamIndex() const { + return Header->HashStreamIndex; +} + +uint16_t TpiStream::getTypeHashStreamAuxIndex() const { + return Header->HashAuxStreamIndex; +} + +uint32_t TpiStream::NumHashBuckets() const { return Header->NumHashBuckets; } +uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; } + +FixedStreamArray<support::ulittle32_t> +TpiStream::getHashValues() const { + return HashValues; +} + +FixedStreamArray<TypeIndexOffset> +TpiStream::getTypeIndexOffsets() const { + return TypeIndexOffsets; +} + +FixedStreamArray<TypeIndexOffset> +TpiStream::getHashAdjustments() const { + return HashAdjustments; +} + +iterator_range<CVTypeArray::Iterator> +TpiStream::types(bool *HadError) const { + return llvm::make_range(TypeRecords.begin(HadError), TypeRecords.end()); +} + +Error TpiStream::commit() { return Error::success(); } |