diff options
Diffstat (limited to 'contrib/llvm/lib/ProfileData')
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMapping.cpp | 18 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp | 101 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProf.cpp | 434 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfIndexed.h | 56 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfReader.cpp | 452 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfWriter.cpp | 175 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProf.cpp | 106 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfReader.cpp | 742 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfWriter.cpp | 184 |
9 files changed, 1735 insertions, 533 deletions
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp index cf04fea8491d..55c0fb4792ef 100644 --- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp +++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp @@ -181,18 +181,6 @@ void FunctionRecordIterator::skipOtherFiles() { *this = FunctionRecordIterator(); } -/// Get the function name from the record, removing the filename prefix if -/// necessary. -static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) { - StringRef FunctionName = Record.FunctionName; - if (Record.Filenames.empty()) - return FunctionName; - StringRef Filename = sys::path::filename(Record.Filenames[0]); - if (FunctionName.startswith(Filename)) - FunctionName = FunctionName.drop_front(Filename.size() + 1); - return FunctionName; -} - ErrorOr<std::unique_ptr<CoverageMapping>> CoverageMapping::load(CoverageMappingReader &CoverageReader, IndexedInstrProfReader &ProfileReader) { @@ -216,7 +204,11 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader, assert(!Record.MappingRegions.empty() && "Function has no regions"); - FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames); + StringRef OrigFuncName = Record.FunctionName; + if (!Record.Filenames.empty()) + OrigFuncName = + getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]); + FunctionRecord Function(OrigFuncName, Record.Filenames); for (const auto &Region : Record.MappingRegions) { ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count); if (!ExecutionCount) diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp index 334a3f51ec9e..a0f82a0d4ede 100644 --- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp +++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp @@ -290,36 +290,25 @@ std::error_code RawCoverageMappingReader::read() { return std::error_code(); } -namespace { - -/// \brief A helper structure to access the data from a section -/// in an object file. -struct SectionData { - StringRef Data; - uint64_t Address; - - std::error_code load(SectionRef &Section) { - if (auto Err = Section.getContents(Data)) - return Err; - Address = Section.getAddress(); - return std::error_code(); - } +std::error_code InstrProfSymtab::create(SectionRef &Section) { + if (auto Err = Section.getContents(Data)) + return Err; + Address = Section.getAddress(); + return std::error_code(); +} - std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) { - if (Pointer < Address) - return coveragemap_error::malformed; - auto Offset = Pointer - Address; - if (Offset + Size > Data.size()) - return coveragemap_error::malformed; - Result = Data.substr(Pointer - Address, Size); - return std::error_code(); - } -}; +StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) { + if (Pointer < Address) + return StringRef(); + auto Offset = Pointer - Address; + if (Offset + Size > Data.size()) + return StringRef(); + return Data.substr(Pointer - Address, Size); } template <typename T, support::endianness Endian> -std::error_code readCoverageMappingData( - SectionData &ProfileNames, StringRef Data, +static std::error_code readCoverageMappingData( + InstrProfSymtab &ProfileNames, StringRef Data, std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records, std::vector<StringRef> &Filenames) { using namespace support; @@ -343,7 +332,7 @@ std::error_code readCoverageMappingData( // Skip past the function records, saving the start and end for later. const char *FunBuf = Buf; - Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t)); + Buf += NRecords * sizeof(coverage::CovMapFunctionRecord<T>); const char *FunEnd = Buf; // Get the filenames. @@ -366,12 +355,15 @@ std::error_code readCoverageMappingData( // before reading the next map. Buf += alignmentAdjustment(Buf, 8); - while (FunBuf < FunEnd) { + auto CFR = + reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf); + while ((const char *)CFR < FunEnd) { // Read the function information - T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf); - uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); - uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); - uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf); + T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr); + uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize); + uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize); + uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash); + CFR++; // Now use that to read the coverage data. if (CovBuf + DataSize > CovEnd) @@ -386,9 +378,9 @@ std::error_code readCoverageMappingData( continue; // Finally, grab the name and create a record. - StringRef FuncName; - if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName)) - return EC; + StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize); + if (NameSize && FuncName.empty()) + return coveragemap_error::malformed; Records.push_back(BinaryCoverageReader::ProfileMappingRecord( CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin)); @@ -401,7 +393,7 @@ std::error_code readCoverageMappingData( static const char *TestingFormatMagic = "llvmcovmtestdata"; static std::error_code loadTestingFormat(StringRef Data, - SectionData &ProfileNames, + InstrProfSymtab &ProfileNames, StringRef &CoverageMapping, uint8_t &BytesInAddress, support::endianness &Endian) { @@ -420,14 +412,14 @@ static std::error_code loadTestingFormat(StringRef Data, if (Data.size() < 1) return coveragemap_error::truncated; N = 0; - ProfileNames.Address = + uint64_t Address = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); if (N > Data.size()) return coveragemap_error::malformed; Data = Data.substr(N); if (Data.size() < ProfileNamesSize) return coveragemap_error::malformed; - ProfileNames.Data = Data.substr(0, ProfileNamesSize); + ProfileNames.create(Data.substr(0, ProfileNamesSize), Address); CoverageMapping = Data.substr(ProfileNamesSize); return std::error_code(); } @@ -443,12 +435,10 @@ static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) { return coveragemap_error::no_data_found; } -static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, - SectionData &ProfileNames, - StringRef &CoverageMapping, - uint8_t &BytesInAddress, - support::endianness &Endian, - StringRef Arch) { +static std::error_code +loadBinaryFormat(MemoryBufferRef ObjectBuffer, InstrProfSymtab &ProfileNames, + StringRef &CoverageMapping, uint8_t &BytesInAddress, + support::endianness &Endian, StringRef Arch) { auto BinOrErr = object::createBinary(ObjectBuffer); if (std::error_code EC = BinOrErr.getError()) return EC; @@ -477,17 +467,18 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, : support::endianness::big; // Look for the sections that we are interested in. - auto NamesSection = lookupSection(*OF, "__llvm_prf_names"); + auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false)); if (auto EC = NamesSection.getError()) return EC; - auto CoverageSection = lookupSection(*OF, "__llvm_covmap"); + auto CoverageSection = + lookupSection(*OF, getInstrProfCoverageSectionName(false)); if (auto EC = CoverageSection.getError()) return EC; // Get the contents of the given sections. if (std::error_code EC = CoverageSection->getContents(CoverageMapping)) return EC; - if (std::error_code EC = ProfileNames.load(*NamesSection)) + if (std::error_code EC = ProfileNames.create(*NamesSection)) return EC; return std::error_code(); @@ -498,33 +489,33 @@ BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer, StringRef Arch) { std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader()); - SectionData Profile; + InstrProfSymtab ProfileNames; StringRef Coverage; uint8_t BytesInAddress; support::endianness Endian; std::error_code EC; if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) // This is a special format used for testing. - EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage, + EC = loadTestingFormat(ObjectBuffer->getBuffer(), ProfileNames, Coverage, BytesInAddress, Endian); else - EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage, - BytesInAddress, Endian, Arch); + EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), ProfileNames, + Coverage, BytesInAddress, Endian, Arch); if (EC) return EC; if (BytesInAddress == 4 && Endian == support::endianness::little) EC = readCoverageMappingData<uint32_t, support::endianness::little>( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 4 && Endian == support::endianness::big) EC = readCoverageMappingData<uint32_t, support::endianness::big>( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 8 && Endian == support::endianness::little) EC = readCoverageMappingData<uint64_t, support::endianness::little>( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 8 && Endian == support::endianness::big) EC = readCoverageMappingData<uint64_t, support::endianness::big>( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else return coveragemap_error::malformed; if (EC) diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp index 92822a71402f..f5acd23129dc 100644 --- a/contrib/llvm/lib/ProfileData/InstrProf.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -12,6 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -28,28 +32,32 @@ class InstrProfErrorCategoryType : public std::error_category { return "Success"; case instrprof_error::eof: return "End of File"; + case instrprof_error::unrecognized_format: + return "Unrecognized instrumentation profile encoding format"; case instrprof_error::bad_magic: - return "Invalid profile data (bad magic)"; + return "Invalid instrumentation profile data (bad magic)"; case instrprof_error::bad_header: - return "Invalid profile data (file header is corrupt)"; + return "Invalid instrumentation profile data (file header is corrupt)"; case instrprof_error::unsupported_version: - return "Unsupported profiling format version"; + return "Unsupported instrumentation profile format version"; case instrprof_error::unsupported_hash_type: - return "Unsupported profiling hash"; + return "Unsupported instrumentation profile hash type"; case instrprof_error::too_large: return "Too much profile data"; case instrprof_error::truncated: return "Truncated profile data"; case instrprof_error::malformed: - return "Malformed profile data"; + return "Malformed instrumentation profile data"; case instrprof_error::unknown_function: return "No profile data available for function"; case instrprof_error::hash_mismatch: - return "Function hash mismatch"; + return "Function control flow change detected (hash mismatch)"; case instrprof_error::count_mismatch: - return "Function count mismatch"; + return "Function basic block count change detected (counter mismatch)"; case instrprof_error::counter_overflow: return "Counter overflow"; + case instrprof_error::value_site_count_mismatch: + return "Function value site count change detected (counter mismatch)"; } llvm_unreachable("A value of instrprof_error has no message."); } @@ -61,3 +69,415 @@ static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory; const std::error_category &llvm::instrprof_category() { return *ErrorCategory; } + +namespace llvm { + +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version LLVM_ATTRIBUTE_UNUSED) { + + // Function names may be prefixed with a binary '1' to indicate + // that the backend should not modify the symbols due to any platform + // naming convention. Do not include that '1' in the PGO profile name. + if (RawFuncName[0] == '\1') + RawFuncName = RawFuncName.substr(1); + + std::string FuncName = RawFuncName; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + // For local symbols, prepend the main file name to distinguish them. + // Do not include the full path in the file name since there's no guarantee + // that it will stay the same, e.g., if the files are checked out from + // version control in different locations. + if (FileName.empty()) + FuncName = FuncName.insert(0, "<unknown>:"); + else + FuncName = FuncName.insert(0, FileName.str() + ":"); + } + return FuncName; +} + +std::string getPGOFuncName(const Function &F, uint64_t Version) { + return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(), + Version); +} + +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { + if (FileName.empty()) + return PGOFuncName; + // Drop the file name including ':'. See also getPGOFuncName. + if (PGOFuncName.startswith(FileName)) + PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); + return PGOFuncName; +} + +// \p FuncName is the string used as profile lookup key for the function. A +// symbol is created to hold the name. Return the legalized symbol name. +static std::string getPGOFuncNameVarName(StringRef FuncName, + GlobalValue::LinkageTypes Linkage) { + std::string VarName = getInstrProfNameVarPrefix(); + VarName += FuncName; + + if (!GlobalValue::isLocalLinkage(Linkage)) + return VarName; + + // Now fix up illegal chars in local VarName that may upset the assembler. + const char *InvalidChars = "-:<>\"'"; + size_t found = VarName.find_first_of(InvalidChars); + while (found != std::string::npos) { + VarName[found] = '_'; + found = VarName.find_first_of(InvalidChars, found + 1); + } + return VarName; +} + +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef FuncName) { + + // We generally want to match the function's linkage, but available_externally + // and extern_weak both have the wrong semantics, and anything that doesn't + // need to link across compilation units doesn't need to be visible at all. + if (Linkage == GlobalValue::ExternalWeakLinkage) + Linkage = GlobalValue::LinkOnceAnyLinkage; + else if (Linkage == GlobalValue::AvailableExternallyLinkage) + Linkage = GlobalValue::LinkOnceODRLinkage; + else if (Linkage == GlobalValue::InternalLinkage || + Linkage == GlobalValue::ExternalLinkage) + Linkage = GlobalValue::PrivateLinkage; + + auto *Value = ConstantDataArray::getString(M.getContext(), FuncName, false); + auto FuncNameVar = + new GlobalVariable(M, Value->getType(), true, Linkage, Value, + getPGOFuncNameVarName(FuncName, Linkage)); + + // Hide the symbol so that we correctly get a copy for each executable. + if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) + FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); + + return FuncNameVar; +} + +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) { + return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName); +} + +instrprof_error +InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, + uint64_t Weight) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + instrprof_error Result = instrprof_error::success; + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + uint64_t JCount = J->Count; + bool Overflowed; + if (Weight > 1) { + JCount = SaturatingMultiply(JCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + I->Count = SaturatingAdd(I->Count, JCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + ++I; + continue; + } + ValueData.insert(I, *J); + } + return Result; +} + +// Merge Value Profile data from Src record to this record for ValueKind. +// Scale merged value counts by \p Weight. +instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind, + InstrProfRecord &Src, + uint64_t Weight) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) + return instrprof_error::value_site_count_mismatch; + std::vector<InstrProfValueSiteRecord> &ThisSiteRecords = + getValueSitesForKind(ValueKind); + std::vector<InstrProfValueSiteRecord> &OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + instrprof_error Result = instrprof_error::success; + for (uint32_t I = 0; I < ThisNumValueSites; I++) + MergeResult(Result, + ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight)); + return Result; +} + +instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, + uint64_t Weight) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) + return instrprof_error::count_mismatch; + + instrprof_error Result = instrprof_error::success; + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + bool Overflowed; + uint64_t OtherCount = Other.Counts[I]; + if (Weight > 1) { + OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + MergeResult(Result, mergeValueProfData(Kind, Other, Weight)); + + return Result; +} + +// Map indirect call target name hash to name string. +uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, + ValueMapType *ValueMap) { + if (!ValueMap) + return Value; + switch (ValueKind) { + case IPVK_IndirectCallTarget: { + auto Result = + std::lower_bound(ValueMap->begin(), ValueMap->end(), Value, + [](const std::pair<uint64_t, uint64_t> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != ValueMap->end()) + Value = (uint64_t)Result->second; + break; + } + } + return Value; +} + +void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + ValueMapType *ValueMap) { + for (uint32_t I = 0; I < N; I++) { + VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap); + } + std::vector<InstrProfValueSiteRecord> &ValueSites = + getValueSitesForKind(ValueKind); + if (N == 0) + ValueSites.push_back(InstrProfValueSiteRecord()); + else + ValueSites.emplace_back(VData, VData + N); +} + +#define INSTR_PROF_COMMON_API_IMPL +#include "llvm/ProfileData/InstrProfData.inc" + +/*! + * \brief ValueProfRecordClosure Interface implementation for InstrProfRecord + * class. These C wrappers are used as adaptors so that C++ code can be + * invoked as callbacks. + */ +uint32_t getNumValueKindsInstrProf(const void *Record) { + return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds(); +} + +uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast<const InstrProfRecord *>(Record) + ->getNumValueSites(VKind); +} + +uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast<const InstrProfRecord *>(Record) + ->getNumValueData(VKind); +} + +uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, + uint32_t S) { + return reinterpret_cast<const InstrProfRecord *>(R) + ->getNumValueDataForSite(VK, S); +} + +void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, + uint32_t K, uint32_t S, + uint64_t (*Mapper)(uint32_t, uint64_t)) { + return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite( + Dst, K, S, Mapper); +} + +ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { + ValueProfData *VD = + (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData()); + memset(VD, 0, TotalSizeInBytes); + return VD; +} + +static ValueProfRecordClosure InstrProfRecordClosure = { + 0, + getNumValueKindsInstrProf, + getNumValueSitesInstrProf, + getNumValueDataInstrProf, + getNumValueDataForSiteInstrProf, + 0, + getValueForSiteInstrProf, + allocValueProfDataInstrProf}; + +// Wrapper implementation using the closure mechanism. +uint32_t ValueProfData::getSize(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + return getValueProfDataSize(&InstrProfRecordClosure); +} + +// Wrapper implementation using the closure mechanism. +std::unique_ptr<ValueProfData> +ValueProfData::serializeFrom(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + + std::unique_ptr<ValueProfData> VPD( + serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr)); + return VPD; +} + +void ValueProfRecord::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + Record.reserveSites(Kind, NumValueSites); + + InstrProfValueData *ValueData = getValueProfRecordValueData(this); + for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) { + uint8_t ValueDataCount = this->SiteCountArray[VSite]; + Record.addValueData(Kind, VSite, ValueData, ValueDataCount, VMap); + ValueData += ValueDataCount; + } +} + +// For writing/serializing, Old is the host endianness, and New is +// byte order intended on disk. For Reading/deserialization, Old +// is the on-disk source endianness, and New is the host endianness. +void ValueProfRecord::swapBytes(support::endianness Old, + support::endianness New) { + using namespace support; + if (Old == New) + return; + + if (getHostEndianness() != Old) { + sys::swapByteOrder<uint32_t>(NumValueSites); + sys::swapByteOrder<uint32_t>(Kind); + } + uint32_t ND = getValueProfRecordNumValueData(this); + InstrProfValueData *VD = getValueProfRecordValueData(this); + + // No need to swap byte array: SiteCountArrray. + for (uint32_t I = 0; I < ND; I++) { + sys::swapByteOrder<uint64_t>(VD[I].Value); + sys::swapByteOrder<uint64_t>(VD[I].Count); + } + if (getHostEndianness() == Old) { + sys::swapByteOrder<uint32_t>(NumValueSites); + sys::swapByteOrder<uint32_t>(Kind); + } +} + +void ValueProfData::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + if (NumValueKinds == 0) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->deserializeTo(Record, VMap); + VR = getValueProfRecordNext(VR); + } +} + +template <class T> +static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) { + using namespace support; + if (Orig == little) + return endian::readNext<T, little, unaligned>(D); + else + return endian::readNext<T, big, unaligned>(D); +} + +static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) { + return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize)) + ValueProfData()); +} + +instrprof_error ValueProfData::checkIntegrity() { + if (NumValueKinds > IPVK_Last + 1) + return instrprof_error::malformed; + // Total size needs to be mulltiple of quadword size. + if (TotalSize % sizeof(uint64_t)) + return instrprof_error::malformed; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < this->NumValueKinds; K++) { + if (VR->Kind > IPVK_Last) + return instrprof_error::malformed; + VR = getValueProfRecordNext(VR); + if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize) + return instrprof_error::malformed; + } + return instrprof_error::success; +} + +ErrorOr<std::unique_ptr<ValueProfData>> +ValueProfData::getValueProfData(const unsigned char *D, + const unsigned char *const BufferEnd, + support::endianness Endianness) { + using namespace support; + if (D + sizeof(ValueProfData) > BufferEnd) + return instrprof_error::truncated; + + const unsigned char *Header = D; + uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness); + if (D + TotalSize > BufferEnd) + return instrprof_error::too_large; + + std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize); + memcpy(VPD.get(), D, TotalSize); + // Byte swap. + VPD->swapBytesToHost(Endianness); + + instrprof_error EC = VPD->checkIntegrity(); + if (EC != instrprof_error::success) + return EC; + + return std::move(VPD); +} + +void ValueProfData::swapBytesToHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + sys::swapByteOrder<uint32_t>(TotalSize); + sys::swapByteOrder<uint32_t>(NumValueKinds); + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->swapBytes(Endianness, getHostEndianness()); + VR = getValueProfRecordNext(VR); + } +} + +void ValueProfData::swapBytesFromHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + ValueProfRecord *NVR = getValueProfRecordNext(VR); + VR->swapBytes(getHostEndianness(), Endianness); + VR = NVR; + } + sys::swapByteOrder<uint32_t>(TotalSize); + sys::swapByteOrder<uint32_t>(NumValueKinds); +} + +} diff --git a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h deleted file mode 100644 index ebca7b22fbfb..000000000000 --- a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h +++ /dev/null @@ -1,56 +0,0 @@ -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Shared header for the instrumented profile data reader and writer. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H - -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MD5.h" - -namespace llvm { - -namespace IndexedInstrProf { -enum class HashT : uint32_t { - MD5, - - Last = MD5 -}; - -static inline uint64_t MD5Hash(StringRef Str) { - MD5 Hash; - Hash.update(Str); - llvm::MD5::MD5Result Result; - Hash.final(Result); - // Return the least significant 8 bytes. Our MD5 implementation returns the - // result in little endian, so we may need to swap bytes. - using namespace llvm::support; - return endian::read<uint64_t, little, unaligned>(Result); -} - -static inline uint64_t ComputeHash(HashT Type, StringRef K) { - switch (Type) { - case HashT::MD5: - return IndexedInstrProf::MD5Hash(K); - } - llvm_unreachable("Unhandled hash type"); -} - -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 2; -const HashT HashType = HashT::MD5; -} - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp index 8a529a000c53..5e83456822fd 100644 --- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfReader.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/STLExtras.h" #include <cassert> @@ -55,8 +54,10 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { Result.reset(new RawInstrProfReader64(std::move(Buffer))); else if (RawInstrProfReader32::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader32(std::move(Buffer))); - else + else if (TextInstrProfReader::hasFormat(*Buffer)) Result.reset(new TextInstrProfReader(std::move(Buffer))); + else + return instrprof_error::unrecognized_format; // Initialize the reader and return the result. if (std::error_code EC = initializeReader(*Result)) @@ -98,16 +99,98 @@ void InstrProfIterator::Increment() { *this = InstrProfIterator(); } +bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { + // Verify that this really looks like plain ASCII text by checking a + // 'reasonable' number of characters (up to profile magic size). + size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); + StringRef buffer = Buffer.getBufferStart(); + return count == 0 || + std::all_of(buffer.begin(), buffer.begin() + count, + [](char c) { return ::isprint(c) || ::isspace(c); }); +} + +std::error_code TextInstrProfReader::readHeader() { + Symtab.reset(new InstrProfSymtab()); + return success(); +} + +std::error_code +TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { + +#define CHECK_LINE_END(Line) \ + if (Line.is_at_end()) \ + return error(instrprof_error::truncated); +#define READ_NUM(Str, Dst) \ + if ((Str).getAsInteger(10, (Dst))) \ + return error(instrprof_error::malformed); +#define VP_READ_ADVANCE(Val) \ + CHECK_LINE_END(Line); \ + uint32_t Val; \ + READ_NUM((*Line), (Val)); \ + Line++; + + if (Line.is_at_end()) + return success(); + + uint32_t NumValueKinds; + if (Line->getAsInteger(10, NumValueKinds)) { + // No value profile data + return success(); + } + if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) + return error(instrprof_error::malformed); + Line++; + + for (uint32_t VK = 0; VK < NumValueKinds; VK++) { + VP_READ_ADVANCE(ValueKind); + if (ValueKind > IPVK_Last) + return error(instrprof_error::malformed); + VP_READ_ADVANCE(NumValueSites); + if (!NumValueSites) + continue; + + Record.reserveSites(VK, NumValueSites); + for (uint32_t S = 0; S < NumValueSites; S++) { + VP_READ_ADVANCE(NumValueData); + + std::vector<InstrProfValueData> CurrentValues; + for (uint32_t V = 0; V < NumValueData; V++) { + CHECK_LINE_END(Line); + std::pair<StringRef, StringRef> VD = Line->split(':'); + uint64_t TakenCount, Value; + if (VK == IPVK_IndirectCallTarget) { + Symtab->addFuncName(VD.first); + Value = IndexedInstrProf::ComputeHash(VD.first); + } else { + READ_NUM(VD.first, Value); + } + READ_NUM(VD.second, TakenCount); + CurrentValues.push_back({Value, TakenCount}); + Line++; + } + Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr); + } + } + return success(); + +#undef CHECK_LINE_END +#undef READ_NUM +#undef VP_READ_ADVANCE +} + std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Skip empty lines and comments. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; // If we hit EOF while looking for a name, we're done. - if (Line.is_at_end()) + if (Line.is_at_end()) { + Symtab->finalizeSymtab(); return error(instrprof_error::eof); + } // Read the function name. Record.Name = *Line++; + Symtab->addFuncName(Record.Name); // Read the function hash. if (Line.is_at_end()) @@ -136,36 +219,14 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { Record.Counts.push_back(Count); } - return success(); -} + // Check if value profile data exists and read it if so. + if (std::error_code EC = readValueProfileData(Record)) + return EC; -template <class IntPtrT> -static uint64_t getRawMagic(); - -template <> -uint64_t getRawMagic<uint64_t>() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('r') << 8 | - uint64_t(129); -} - -template <> -uint64_t getRawMagic<uint32_t>() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('R') << 8 | - uint64_t(129); + // This is needed to avoid two pass parsing because llvm-profdata + // does dumping while reading. + Symtab->finalizeSymtab(); + return success(); } template <class IntPtrT> @@ -174,19 +235,19 @@ bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { return false; uint64_t Magic = *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); - return getRawMagic<IntPtrT>() == Magic || - sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic; + return RawInstrProf::getMagic<IntPtrT>() == Magic || + sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; } template <class IntPtrT> std::error_code RawInstrProfReader<IntPtrT>::readHeader() { if (!hasFormat(*DataBuffer)) return error(instrprof_error::bad_magic); - if (DataBuffer->getBufferSize() < sizeof(RawHeader)) + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) return error(instrprof_error::bad_header); - auto *Header = - reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart()); - ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>(); + auto *Header = reinterpret_cast<const RawInstrProf::Header *>( + DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); return readHeader(*Header); } @@ -202,29 +263,38 @@ RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { return instrprof_error::eof; // If there isn't enough space for another header, this is probably just // garbage at the end of the file. - if (CurrentPos + sizeof(RawHeader) > End) + if (CurrentPos + sizeof(RawInstrProf::Header) > End) return instrprof_error::malformed; // The writer ensures each profile is padded to start at an aligned address. if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>()) return instrprof_error::malformed; // The magic should have the same byte order as in the previous header. uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); - if (Magic != swap(getRawMagic<IntPtrT>())) + if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) return instrprof_error::bad_magic; // There's another profile to read, so we need to process the header. - auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos); + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); return readHeader(*Header); } -static uint64_t getRawVersion() { - return 1; +template <class IntPtrT> +void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { + for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { + StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); + Symtab.addFuncName(FunctionName); + const IntPtrT FPtr = swap(I->FunctionPointer); + if (!FPtr) + continue; + Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName)); + } + Symtab.finalizeSymtab(); } template <class IntPtrT> std::error_code -RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { - if (swap(Header.Version) != getRawVersion()) +RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) { + if (swap(Header.Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); CountersDelta = swap(Header.CountersDelta); @@ -232,50 +302,69 @@ RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { auto DataSize = swap(Header.DataSize); auto CountersSize = swap(Header.CountersSize); auto NamesSize = swap(Header.NamesSize); + auto ValueDataSize = swap(Header.ValueDataSize); + ValueKindLast = swap(Header.ValueKindLast); + + auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); + auto PaddingSize = getNumPaddingBytes(NamesSize); - ptrdiff_t DataOffset = sizeof(RawHeader); - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); + ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes; ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; - size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; + ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; + size_t ProfileSize = ValueDataOffset + ValueDataSize; auto *Start = reinterpret_cast<const char *>(&Header); if (Start + ProfileSize > DataBuffer->getBufferEnd()) return error(instrprof_error::bad_header); - Data = reinterpret_cast<const ProfileData *>(Start + DataOffset); + Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( + Start + DataOffset); DataEnd = Data + DataSize; CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); NamesStart = Start + NamesOffset; + ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); ProfileEnd = Start + ProfileSize; + std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>(); + createSymtab(*NewSymtab.get()); + Symtab = std::move(NewSymtab); return success(); } template <class IntPtrT> -std::error_code -RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { - if (Data == DataEnd) - if (std::error_code EC = readNextHeader(ProfileEnd)) - return EC; +std::error_code RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) { + Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize)); + if (Record.Name.data() < NamesStart || + Record.Name.data() + Record.Name.size() > + reinterpret_cast<const char *>(ValueDataStart)) + return error(instrprof_error::malformed); + return success(); +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readFuncHash( + InstrProfRecord &Record) { + Record.Hash = swap(Data->FuncHash); + return success(); +} - // Get the raw data. - StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readRawCounts( + InstrProfRecord &Record) { uint32_t NumCounters = swap(Data->NumCounters); + IntPtrT CounterPtr = Data->CounterPtr; if (NumCounters == 0) return error(instrprof_error::malformed); - auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters); - // Check bounds. + auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); - if (RawName.data() < NamesStart || - RawName.data() + RawName.size() > DataBuffer->getBufferEnd() || - RawCounts.data() < CountersStart || + + // Check bounds. + if (RawCounts.data() < CountersStart || RawCounts.data() + RawCounts.size() > NamesStartAsCounter) return error(instrprof_error::malformed); - // Store the data in Record, byte-swapping as necessary. - Record.Hash = swap(Data->FuncHash); - Record.Name = RawName; if (ShouldSwapBytes) { Record.Counts.clear(); Record.Counts.reserve(RawCounts.size()); @@ -284,8 +373,61 @@ RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { } else Record.Counts = RawCounts; + return success(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readValueProfilingData(InstrProfRecord &Record) { + + Record.clearValueData(); + CurValueDataSize = 0; + // Need to match the logic in value profile dumper code in compiler-rt: + uint32_t NumValueKinds = 0; + for (uint32_t I = 0; I < IPVK_Last + 1; I++) + NumValueKinds += (Data->NumValueSites[I] != 0); + + if (!NumValueKinds) + return success(); + + ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr = + ValueProfData::getValueProfData(ValueDataStart, + (const unsigned char *)ProfileEnd, + getDataEndianness()); + + if (VDataPtrOrErr.getError()) + return VDataPtrOrErr.getError(); + + VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap()); + CurValueDataSize = VDataPtrOrErr.get()->getSize(); + return success(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { + if (atEnd()) + if (std::error_code EC = readNextHeader(ProfileEnd)) + return EC; + + // Read name ad set it in Record. + if (std::error_code EC = readName(Record)) + return EC; + + // Read FuncHash and set it in Record. + if (std::error_code EC = readFuncHash(Record)) + return EC; + + // Read raw counts and set Record. + if (std::error_code EC = readRawCounts(Record)) + return EC; + + // Read value data and set Record. + if (std::error_code EC = readValueProfilingData(Record)) + return EC; + // Iterate. - ++Data; + advanceData(); return success(); } @@ -302,52 +444,112 @@ InstrProfLookupTrait::ComputeHash(StringRef K) { typedef InstrProfLookupTrait::data_type data_type; typedef InstrProfLookupTrait::offset_type offset_type; +bool InstrProfLookupTrait::readValueProfilingData( + const unsigned char *&D, const unsigned char *const End) { + ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr = + ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); + + if (VDataPtrOrErr.getError()) + return false; + + VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); + D += VDataPtrOrErr.get()->TotalSize; + + return true; +} + data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, offset_type N) { - // Check if the data is corrupt. If so, don't try to read it. if (N % sizeof(uint64_t)) return data_type(); DataBuffer.clear(); - uint64_t NumCounts; - uint64_t NumEntries = N / sizeof(uint64_t); std::vector<uint64_t> CounterBuffer; - for (uint64_t I = 0; I < NumEntries; I += NumCounts) { - using namespace support; - // The function hash comes first. - uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); - if (++I >= NumEntries) + using namespace support; + const unsigned char *End = D + N; + while (D < End) { + // Read hash. + if (D + sizeof(uint64_t) >= End) return data_type(); + uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); - // In v1, we have at least one count. - // Later, we have the number of counts. - NumCounts = (1 == FormatVersion) - ? NumEntries - I - : endian::readNext<uint64_t, little, unaligned>(D); - if (1 != FormatVersion) - ++I; - - // If we have more counts than data, this is bogus. - if (I + NumCounts > NumEntries) + // Initialize number of counters for FormatVersion == 1. + uint64_t CountsSize = N / sizeof(uint64_t) - 1; + // If format version is different then read the number of counters. + if (FormatVersion != 1) { + if (D + sizeof(uint64_t) > End) + return data_type(); + CountsSize = endian::readNext<uint64_t, little, unaligned>(D); + } + // Read counter values. + if (D + CountsSize * sizeof(uint64_t) > End) return data_type(); CounterBuffer.clear(); - for (unsigned J = 0; J < NumCounts; ++J) + CounterBuffer.reserve(CountsSize); + for (uint64_t J = 0; J < CountsSize; ++J) CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); - DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer))); + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + + // Read value profiling data. + if (FormatVersion > 2 && !readValueProfilingData(D, End)) { + DataBuffer.clear(); + return data_type(); + } } return DataBuffer; } +template <typename HashTableImpl> +std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords( + StringRef FuncName, ArrayRef<InstrProfRecord> &Data) { + auto Iter = HashTable->find(FuncName); + if (Iter == HashTable->end()) + return instrprof_error::unknown_function; + + Data = (*Iter); + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template <typename HashTableImpl> +std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords( + ArrayRef<InstrProfRecord> &Data) { + if (atEnd()) + return instrprof_error::eof; + + Data = *RecordIterator; + + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template <typename HashTableImpl> +InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( + const unsigned char *Buckets, const unsigned char *const Payload, + const unsigned char *const Base, IndexedInstrProf::HashT HashType, + uint64_t Version) { + FormatVersion = Version; + HashTable.reset(HashTableImpl::Create( + Buckets, Payload, Base, + typename HashTableImpl::InfoType(HashType, Version))); + RecordIterator = HashTable->data_begin(); +} + bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < 8) return false; using namespace support; uint64_t Magic = endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); + // Verify that it's magical. return Magic == IndexedInstrProf::Magic; } @@ -360,71 +562,91 @@ std::error_code IndexedInstrProfReader::readHeader() { using namespace support; + auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); + Cur += sizeof(IndexedInstrProf::Header); + // Check the magic number. - uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur); + uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); if (Magic != IndexedInstrProf::Magic) return error(instrprof_error::bad_magic); // Read the version. - FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur); + uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); if (FormatVersion > IndexedInstrProf::Version) return error(instrprof_error::unsupported_version); // Read the maximal function count. - MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur); + MaxFunctionCount = + endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( - endian::readNext<uint64_t, little, unaligned>(Cur)); + endian::byte_swap<uint64_t, little>(Header->HashType)); if (HashType > IndexedInstrProf::HashT::Last) return error(instrprof_error::unsupported_hash_type); - uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur); - // The rest of the file is an on disk hash table. - Index.reset(InstrProfReaderIndex::Create( - Start + HashOffset, Cur, Start, - InstrProfLookupTrait(HashType, FormatVersion))); - // Set up our iterator for readNextRecord. - RecordIterator = Index->data_begin(); + uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); + // The rest of the file is an on disk hash table. + InstrProfReaderIndexBase *IndexPtr = nullptr; + IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>( + Start + HashOffset, Cur, Start, HashType, FormatVersion); + Index.reset(IndexPtr); return success(); } -std::error_code IndexedInstrProfReader::getFunctionCounts( - StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) { - auto Iter = Index->find(FuncName); - if (Iter == Index->end()) - return error(instrprof_error::unknown_function); +InstrProfSymtab &IndexedInstrProfReader::getSymtab() { + if (Symtab.get()) + return *Symtab.get(); - // Found it. Look for counters with the right hash. - ArrayRef<InstrProfRecord> Data = (*Iter); - if (Data.empty()) - return error(instrprof_error::malformed); + std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>(); + Index->populateSymtab(*NewSymtab.get()); + + Symtab = std::move(NewSymtab); + return *Symtab.get(); +} +ErrorOr<InstrProfRecord> +IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash) { + ArrayRef<InstrProfRecord> Data; + std::error_code EC = Index->getRecords(FuncName, Data); + if (EC != instrprof_error::success) + return EC; + // Found it. Look for counters with the right hash. for (unsigned I = 0, E = Data.size(); I < E; ++I) { // Check for a match and fill the vector if there is one. if (Data[I].Hash == FuncHash) { - Counts = Data[I].Counts; - return success(); + return std::move(Data[I]); } } return error(instrprof_error::hash_mismatch); } std::error_code -IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { - // Are we out of records? - if (RecordIterator == Index->data_end()) - return error(instrprof_error::eof); +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector<uint64_t> &Counts) { + ErrorOr<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); + if (std::error_code EC = Record.getError()) + return EC; - if ((*RecordIterator).empty()) - return error(instrprof_error::malformed); + Counts = Record.get().Counts; + return success(); +} +std::error_code IndexedInstrProfReader::readNextRecord( + InstrProfRecord &Record) { static unsigned RecordIndex = 0; - ArrayRef<InstrProfRecord> Data = (*RecordIterator); + + ArrayRef<InstrProfRecord> Data; + + std::error_code EC = Index->getRecords(Data); + if (EC != instrprof_error::success) + return error(EC); + Record = Data[RecordIndex++]; if (RecordIndex >= Data.size()) { - ++RecordIterator; + Index->advanceToNextKey(); RecordIndex = 0; } return success(); diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp index 2188543ed61c..9bb03e1e77a3 100644 --- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -13,27 +13,29 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfWriter.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/OnDiskHashTable.h" +#include <tuple> using namespace llvm; namespace { +static support::endianness ValueProfDataEndianness = support::little; + class InstrProfRecordTrait { public: typedef StringRef key_type; typedef StringRef key_type_ref; - typedef const InstrProfWriter::CounterData *const data_type; - typedef const InstrProfWriter::CounterData *const data_type_ref; + typedef const InstrProfWriter::ProfilingData *const data_type; + typedef const InstrProfWriter::ProfilingData *const data_type_ref; typedef uint64_t hash_value_type; typedef uint64_t offset_type; static hash_value_type ComputeHash(key_type_ref K) { - return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K); + return IndexedInstrProf::ComputeHash(K); } static std::pair<offset_type, offset_type> @@ -45,8 +47,15 @@ public: LE.write<offset_type>(N); offset_type M = 0; - for (const auto &Counts : *V) - M += (2 + Counts.second.size()) * sizeof(uint64_t); + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; + M += sizeof(uint64_t); // The function hash + M += sizeof(uint64_t); // The size of the Counts vector + M += ProfRecord.Counts.size() * sizeof(uint64_t); + + // Value data + M += ValueProfData::getSize(ProfileData.second); + } LE.write<offset_type>(M); return std::make_pair(N, M); @@ -60,50 +69,68 @@ public: offset_type) { using namespace llvm::support; endian::Writer<little> LE(Out); + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; - for (const auto &Counts : *V) { - LE.write<uint64_t>(Counts.first); - LE.write<uint64_t>(Counts.second.size()); - for (uint64_t I : Counts.second) + LE.write<uint64_t>(ProfileData.first); // Function hash + LE.write<uint64_t>(ProfRecord.Counts.size()); + for (uint64_t I : ProfRecord.Counts) LE.write<uint64_t>(I); + + // Write value data + std::unique_ptr<ValueProfData> VDataPtr = + ValueProfData::serializeFrom(ProfileData.second); + uint32_t S = VDataPtr->getSize(); + VDataPtr->swapBytesFromHost(ValueProfDataEndianness); + Out.write((const char *)VDataPtr.get(), S); } } }; } -std::error_code -InstrProfWriter::addFunctionCounts(StringRef FunctionName, - uint64_t FunctionHash, - ArrayRef<uint64_t> Counters) { - auto &CounterData = FunctionData[FunctionName]; +// Internal interface for testing purpose only. +void InstrProfWriter::setValueProfDataEndianness( + support::endianness Endianness) { + ValueProfDataEndianness = Endianness; +} + +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, + uint64_t Weight) { + auto &ProfileDataMap = FunctionData[I.Name]; + + bool NewFunc; + ProfilingData::iterator Where; + std::tie(Where, NewFunc) = + ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + InstrProfRecord &Dest = Where->second; - auto Where = CounterData.find(FunctionHash); - if (Where == CounterData.end()) { + instrprof_error Result; + if (NewFunc) { // We've never seen a function with this name and hash, add it. - CounterData[FunctionHash] = Counters; - // We keep track of the max function count as we go for simplicity. - if (Counters[0] > MaxFunctionCount) - MaxFunctionCount = Counters[0]; - return instrprof_error::success; + Dest = std::move(I); + // Fix up the name to avoid dangling reference. + Dest.Name = FunctionData.find(Dest.Name)->getKey(); + Result = instrprof_error::success; + if (Weight > 1) { + for (auto &Count : Dest.Counts) { + bool Overflowed; + Count = SaturatingMultiply(Count, Weight, &Overflowed); + if (Overflowed && Result == instrprof_error::success) { + Result = instrprof_error::counter_overflow; + } + } + } + } else { + // We're updating a function we've seen before. + Result = Dest.merge(I, Weight); } - // We're updating a function we've seen before. - auto &FoundCounters = Where->second; - // If the number of counters doesn't match we either have bad data or a hash - // collision. - if (FoundCounters.size() != Counters.size()) - return instrprof_error::count_mismatch; - - for (size_t I = 0, E = Counters.size(); I < E; ++I) { - if (FoundCounters[I] + Counters[I] < FoundCounters[I]) - return instrprof_error::counter_overflow; - FoundCounters[I] += Counters[I]; - } // We keep track of the max function count as we go for simplicity. - if (FoundCounters[0] > MaxFunctionCount) - MaxFunctionCount = FoundCounters[0]; + // Update this statistic no matter the result of the merge. + if (Dest.Counts[0] > MaxFunctionCount) + MaxFunctionCount = Dest.Counts[0]; - return instrprof_error::success; + return Result; } std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) { @@ -117,13 +144,23 @@ std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) { endian::Writer<little> LE(OS); // Write the header. - LE.write<uint64_t>(IndexedInstrProf::Magic); - LE.write<uint64_t>(IndexedInstrProf::Version); - LE.write<uint64_t>(MaxFunctionCount); - LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType)); + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; + Header.Version = IndexedInstrProf::Version; + Header.MaxFunctionCount = MaxFunctionCount; + Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType); + Header.HashOffset = 0; + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); + + // Only write out all the fields execpt 'HashOffset'. We need + // to remember the offset of that field to allow back patching + // later. + for (int I = 0; I < N - 1; I++) + LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]); // Save a space to write the hash table start location. uint64_t HashTableStartLoc = OS.tell(); + // Reserve the space for HashOffset field. LE.write<uint64_t>(0); // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS); @@ -138,9 +175,65 @@ void InstrProfWriter::write(raw_fd_ostream &OS) { // Go back and fill in the hash table start. using namespace support; OS.seek(TableStart.first); + // Now patch the HashOffset field previously reserved. endian::Writer<little>(OS).write<uint64_t>(TableStart.second); } +static const char *ValueProfKindStr[] = { +#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, + InstrProfSymtab &Symtab, + raw_fd_ostream &OS) { + OS << Func.Name << "\n"; + OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; + OS << "# Counter Values:\n"; + for (uint64_t Count : Func.Counts) + OS << Count << "\n"; + + uint32_t NumValueKinds = Func.getNumValueKinds(); + if (!NumValueKinds) { + OS << "\n"; + return; + } + + OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n"; + for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { + uint32_t NS = Func.getNumValueSites(VK); + if (!NS) + continue; + OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; + OS << "# NumValueSites:\n" << NS << "\n"; + for (uint32_t S = 0; S < NS; S++) { + uint32_t ND = Func.getNumValueDataForSite(VK, S); + OS << ND << "\n"; + std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S); + for (uint32_t I = 0; I < ND; I++) { + if (VK == IPVK_IndirectCallTarget) + OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; + else + OS << VD[I].Value << ":" << VD[I].Count << "\n"; + } + } + } + + OS << "\n"; +} + +void InstrProfWriter::writeText(raw_fd_ostream &OS) { + InstrProfSymtab Symtab; + for (const auto &I : FunctionData) + Symtab.addFuncName(I.getKey()); + Symtab.finalizeSymtab(); + + for (const auto &I : FunctionData) + for (const auto &Func : I.getValue()) + writeRecordInText(Func.second, Symtab, OS); +} + std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { std::string Data; llvm::raw_string_ostream OS(Data); diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp index 920c48a24640..9ded757f2b28 100644 --- a/contrib/llvm/lib/ProfileData/SampleProf.cpp +++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +using namespace llvm::sampleprof; using namespace llvm; namespace { @@ -27,17 +28,25 @@ class SampleProfErrorCategoryType : public std::error_category { case sampleprof_error::success: return "Success"; case sampleprof_error::bad_magic: - return "Invalid file format (bad magic)"; + return "Invalid sample profile data (bad magic)"; case sampleprof_error::unsupported_version: - return "Unsupported format version"; + return "Unsupported sample profile format version"; case sampleprof_error::too_large: return "Too much profile data"; case sampleprof_error::truncated: return "Truncated profile data"; case sampleprof_error::malformed: - return "Malformed profile data"; + return "Malformed sample profile data"; case sampleprof_error::unrecognized_format: - return "Unrecognized profile encoding format"; + return "Unrecognized sample profile encoding format"; + case sampleprof_error::unsupported_writing_format: + return "Profile encoding format unsupported for writing operations"; + case sampleprof_error::truncated_name_table: + return "Truncated function name table"; + case sampleprof_error::not_implemented: + return "Unimplemented feature"; + case sampleprof_error::counter_overflow: + return "Counter overflow"; } llvm_unreachable("A value of sampleprof_error has no message."); } @@ -49,3 +58,92 @@ static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory; const std::error_category &llvm::sampleprof_category() { return *ErrorCategory; } + +void LineLocation::print(raw_ostream &OS) const { + OS << LineOffset; + if (Discriminator > 0) + OS << "." << Discriminator; +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const LineLocation &Loc) { + Loc.print(OS); + return OS; +} + +void LineLocation::dump() const { print(dbgs()); } + +void CallsiteLocation::print(raw_ostream &OS) const { + LineLocation::print(OS); + OS << ": inlined callee: " << CalleeName; +} + +void CallsiteLocation::dump() const { print(dbgs()); } + +inline raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const CallsiteLocation &Loc) { + Loc.print(OS); + return OS; +} + +/// \brief Print the sample record to the stream \p OS indented by \p Indent. +void SampleRecord::print(raw_ostream &OS, unsigned Indent) const { + OS << NumSamples; + if (hasCalls()) { + OS << ", calls:"; + for (const auto &I : getCallTargets()) + OS << " " << I.first() << ":" << I.second; + } + OS << "\n"; +} + +void SampleRecord::dump() const { print(dbgs(), 0); } + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const SampleRecord &Sample) { + Sample.print(OS, 0); + return OS; +} + +/// \brief Print the samples collected for a function on stream \p OS. +void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { + OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() + << " sampled lines\n"; + + OS.indent(Indent); + if (BodySamples.size() > 0) { + OS << "Samples collected in the function's body {\n"; + SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples); + for (const auto &SI : SortedBodySamples.get()) { + OS.indent(Indent + 2); + OS << SI->first << ": " << SI->second; + } + OS.indent(Indent); + OS << "}\n"; + } else { + OS << "No samples collected in the function's body\n"; + } + + OS.indent(Indent); + if (CallsiteSamples.size() > 0) { + OS << "Samples collected in inlined callsites {\n"; + SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples( + CallsiteSamples); + for (const auto &CS : SortedCallsiteSamples.get()) { + OS.indent(Indent + 2); + OS << CS->first << ": "; + CS->second.print(OS, Indent + 4); + } + OS << "}\n"; + } else { + OS << "No inlined callsites in this function\n"; + } +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const FunctionSamples &FS) { + FS.print(OS); + return OS; +} + +void FunctionSamples::dump(void) const { print(dbgs(), 0); } diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp index b39bfd6e2ecd..93cd87bb82f8 100644 --- a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp @@ -8,133 +8,37 @@ //===----------------------------------------------------------------------===// // // This file implements the class that reads LLVM sample profiles. It -// supports two file formats: text and binary. The textual representation -// is useful for debugging and testing purposes. The binary representation -// is more compact, resulting in smaller file sizes. However, they can -// both be used interchangeably. +// supports three file formats: text, binary and gcov. // -// NOTE: If you are making changes to the file format, please remember -// to document them in the Clang documentation at -// tools/clang/docs/UsersManual.rst. +// The textual representation is useful for debugging and testing purposes. The +// binary representation is more compact, resulting in smaller file sizes. // -// Text format -// ----------- +// The gcov encoding is the one generated by GCC's AutoFDO profile creation +// tool (https://github.com/google/autofdo) // -// Sample profiles are written as ASCII text. The file is divided into -// sections, which correspond to each of the functions executed at runtime. -// Each section has the following format -// -// function1:total_samples:total_head_samples -// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] -// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] -// ... -// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] -// -// The file may contain blank lines between sections and within a -// section. However, the spacing within a single line is fixed. Additional -// spaces will result in an error while reading the file. -// -// Function names must be mangled in order for the profile loader to -// match them in the current translation unit. The two numbers in the -// function header specify how many total samples were accumulated in the -// function (first number), and the total number of samples accumulated -// in the prologue of the function (second number). This head sample -// count provides an indicator of how frequently the function is invoked. -// -// Each sampled line may contain several items. Some are optional (marked -// below): -// -// a. Source line offset. This number represents the line number -// in the function where the sample was collected. The line number is -// always relative to the line where symbol of the function is -// defined. So, if the function has its header at line 280, the offset -// 13 is at line 293 in the file. -// -// Note that this offset should never be a negative number. This could -// happen in cases like macros. The debug machinery will register the -// line number at the point of macro expansion. So, if the macro was -// expanded in a line before the start of the function, the profile -// converter should emit a 0 as the offset (this means that the optimizers -// will not be able to associate a meaningful weight to the instructions -// in the macro). -// -// b. [OPTIONAL] Discriminator. This is used if the sampled program -// was compiled with DWARF discriminator support -// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). -// DWARF discriminators are unsigned integer values that allow the -// compiler to distinguish between multiple execution paths on the -// same source line location. -// -// For example, consider the line of code ``if (cond) foo(); else bar();``. -// If the predicate ``cond`` is true 80% of the time, then the edge -// into function ``foo`` should be considered to be taken most of the -// time. But both calls to ``foo`` and ``bar`` are at the same source -// line, so a sample count at that line is not sufficient. The -// compiler needs to know which part of that line is taken more -// frequently. -// -// This is what discriminators provide. In this case, the calls to -// ``foo`` and ``bar`` will be at the same line, but will have -// different discriminator values. This allows the compiler to correctly -// set edge weights into ``foo`` and ``bar``. -// -// c. Number of samples. This is an integer quantity representing the -// number of samples collected by the profiler at this source -// location. -// -// d. [OPTIONAL] Potential call targets and samples. If present, this -// line contains a call instruction. This models both direct and -// number of samples. For example, -// -// 130: 7 foo:3 bar:2 baz:7 -// -// The above means that at relative line offset 130 there is a call -// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, -// with ``baz()`` being the relatively more frequently called target. +// All three encodings can be used interchangeably as an input sample profile. // //===----------------------------------------------------------------------===// #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" using namespace llvm::sampleprof; using namespace llvm; -/// \brief Print the samples collected for a function on stream \p OS. -/// -/// \param OS Stream to emit the output to. -void FunctionSamples::print(raw_ostream &OS) { - OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() - << " sampled lines\n"; - for (const auto &SI : BodySamples) { - LineLocation Loc = SI.first; - const SampleRecord &Sample = SI.second; - OS << "\tline offset: " << Loc.LineOffset - << ", discriminator: " << Loc.Discriminator - << ", number of samples: " << Sample.getSamples(); - if (Sample.hasCalls()) { - OS << ", calls:"; - for (const auto &I : Sample.getCallTargets()) - OS << " " << I.first() << ":" << I.second; - } - OS << "\n"; - } - OS << "\n"; -} - /// \brief Dump the function profile for \p FName. /// /// \param FName Name of the function to print. /// \param OS Stream to emit the output to. void SampleProfileReader::dumpFunctionProfile(StringRef FName, raw_ostream &OS) { - OS << "Function: " << FName << ": "; - Profiles[FName].print(OS); + OS << "Function: " << FName << ": " << Profiles[FName]; } /// \brief Dump all the function profiles found on stream \p OS. @@ -143,6 +47,102 @@ void SampleProfileReader::dump(raw_ostream &OS) { dumpFunctionProfile(I.getKey(), OS); } +/// \brief Parse \p Input as function head. +/// +/// Parse one line of \p Input, and update function name in \p FName, +/// function's total sample count in \p NumSamples, function's entry +/// count in \p NumHeadSamples. +/// +/// \returns true if parsing is successful. +static bool ParseHead(const StringRef &Input, StringRef &FName, + uint64_t &NumSamples, uint64_t &NumHeadSamples) { + if (Input[0] == ' ') + return false; + size_t n2 = Input.rfind(':'); + size_t n1 = Input.rfind(':', n2 - 1); + FName = Input.substr(0, n1); + if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) + return false; + if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) + return false; + return true; +} + + +/// \brief Returns true if line offset \p L is legal (only has 16 bits). +static bool isOffsetLegal(unsigned L) { + return (L & 0xffff) == L; +} + +/// \brief Parse \p Input as line sample. +/// +/// \param Input input line. +/// \param IsCallsite true if the line represents an inlined callsite. +/// \param Depth the depth of the inline stack. +/// \param NumSamples total samples of the line/inlined callsite. +/// \param LineOffset line offset to the start of the function. +/// \param Discriminator discriminator of the line. +/// \param TargetCountMap map from indirect call target to count. +/// +/// returns true if parsing is successful. +static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, + uint64_t &NumSamples, uint32_t &LineOffset, + uint32_t &Discriminator, StringRef &CalleeName, + DenseMap<StringRef, uint64_t> &TargetCountMap) { + for (Depth = 0; Input[Depth] == ' '; Depth++) + ; + if (Depth == 0) + return false; + + size_t n1 = Input.find(':'); + StringRef Loc = Input.substr(Depth, n1 - Depth); + size_t n2 = Loc.find('.'); + if (n2 == StringRef::npos) { + if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) + return false; + Discriminator = 0; + } else { + if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) + return false; + if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) + return false; + } + + StringRef Rest = Input.substr(n1 + 2); + if (Rest[0] >= '0' && Rest[0] <= '9') { + IsCallsite = false; + size_t n3 = Rest.find(' '); + if (n3 == StringRef::npos) { + if (Rest.getAsInteger(10, NumSamples)) + return false; + } else { + if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) + return false; + } + while (n3 != StringRef::npos) { + n3 += Rest.substr(n3).find_first_not_of(' '); + Rest = Rest.substr(n3); + n3 = Rest.find(' '); + StringRef pair = Rest; + if (n3 != StringRef::npos) { + pair = Rest.substr(0, n3); + } + size_t n4 = pair.find(':'); + uint64_t count; + if (pair.substr(n4 + 1).getAsInteger(10, count)) + return false; + TargetCountMap[pair.substr(0, n4)] = count; + } + } else { + IsCallsite = true; + size_t n3 = Rest.find_last_of(':'); + CalleeName = Rest.substr(0, n3); + if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) + return false; + } + return true; +} + /// \brief Load samples from a text file. /// /// See the documentation at the top of the file for an explanation of @@ -151,14 +151,13 @@ void SampleProfileReader::dump(raw_ostream &OS) { /// \returns true if the file was loaded successfully, false otherwise. std::error_code SampleProfileReaderText::read() { line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); + sampleprof_error Result = sampleprof_error::success; + + InlineCallStack InlineStack; - // Read the profile of each function. Since each function may be - // mentioned more than once, and we are collecting flat profiles, - // accumulate samples as we parse them. - Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); - Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); - Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)"); - while (!LineIt.is_at_eof()) { + for (; !LineIt.is_at_eof(); ++LineIt) { + if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') + continue; // Read the header of each function. // // Note that for function identifiers we are actually expecting @@ -171,63 +170,74 @@ std::error_code SampleProfileReaderText::read() { // // The only requirement we place on the identifier, then, is that it // should not begin with a number. - SmallVector<StringRef, 4> Matches; - if (!HeadRE.match(*LineIt, &Matches)) { - reportParseError(LineIt.line_number(), - "Expected 'mangled_name:NUM:NUM', found " + *LineIt); - return sampleprof_error::malformed; - } - assert(Matches.size() == 4); - StringRef FName = Matches[1]; - unsigned NumSamples, NumHeadSamples; - Matches[2].getAsInteger(10, NumSamples); - Matches[3].getAsInteger(10, NumHeadSamples); - Profiles[FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[FName]; - FProfile.addTotalSamples(NumSamples); - FProfile.addHeadSamples(NumHeadSamples); - ++LineIt; - - // Now read the body. The body of the function ends when we reach - // EOF or when we see the start of the next function. - while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { - if (!LineSampleRE.match(*LineIt, &Matches)) { - reportParseError( - LineIt.line_number(), - "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { + reportError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); + return sampleprof_error::malformed; + } + Profiles[FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FName]; + MergeResult(Result, FProfile.addTotalSamples(NumSamples)); + MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); + InlineStack.clear(); + InlineStack.push_back(&FProfile); + } else { + uint64_t NumSamples; + StringRef FName; + DenseMap<StringRef, uint64_t> TargetCountMap; + bool IsCallsite; + uint32_t Depth, LineOffset, Discriminator; + if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset, + Discriminator, FName, TargetCountMap)) { + reportError(LineIt.line_number(), + "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + + *LineIt); return sampleprof_error::malformed; } - assert(Matches.size() == 5); - unsigned LineOffset, NumSamples, Discriminator = 0; - Matches[1].getAsInteger(10, LineOffset); - if (Matches[2] != "") - Matches[2].getAsInteger(10, Discriminator); - Matches[3].getAsInteger(10, NumSamples); - - // If there are function calls in this line, generate a call sample - // entry for each call. - std::string CallsLine(Matches[4]); - while (CallsLine != "") { - SmallVector<StringRef, 3> CallSample; - if (!CallSampleRE.match(CallsLine, &CallSample)) { - reportParseError(LineIt.line_number(), - "Expected 'mangled_name:NUM', found " + CallsLine); - return sampleprof_error::malformed; + if (IsCallsite) { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); } - StringRef CalledFunction = CallSample[1]; - unsigned CalledFunctionSamples; - CallSample[2].getAsInteger(10, CalledFunctionSamples); - FProfile.addCalledTargetSamples(LineOffset, Discriminator, - CalledFunction, CalledFunctionSamples); - CallsLine = CallSampleRE.sub("", CallsLine); + FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, FName)); + MergeResult(Result, FSamples.addTotalSamples(NumSamples)); + InlineStack.push_back(&FSamples); + } else { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FProfile = *InlineStack.back(); + for (const auto &name_count : TargetCountMap) { + MergeResult(Result, FProfile.addCalledTargetSamples( + LineOffset, Discriminator, name_count.first, + name_count.second)); + } + MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, + NumSamples)); } + } + } - FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); - ++LineIt; + return Result; +} + +bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { + bool result = false; + + // Check that the first non-comment line is a valid function header. + line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); + if (!LineIt.is_at_eof()) { + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); } } - return sampleprof_error::success; + return result; } template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { @@ -243,7 +253,7 @@ template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { EC = sampleprof_error::success; if (EC) { - reportParseError(0, EC.message()); + reportError(0, EC.message()); return EC; } @@ -256,7 +266,7 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() { StringRef Str(reinterpret_cast<const char *>(Data)); if (Data + Str.size() + 1 > End) { EC = sampleprof_error::truncated; - reportParseError(0, EC.message()); + reportError(0, EC.message()); return EC; } @@ -264,62 +274,109 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() { return Str; } -std::error_code SampleProfileReaderBinary::read() { - while (!at_eof()) { - auto FName(readString()); - if (std::error_code EC = FName.getError()) +ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { + std::error_code EC; + auto Idx = readNumber<uint32_t>(); + if (std::error_code EC = Idx.getError()) + return EC; + if (*Idx >= NameTable.size()) + return sampleprof_error::truncated_name_table; + return NameTable[*Idx]; +} + +std::error_code +SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) + return EC; + FProfile.addTotalSamples(*NumSamples); + + // Read the samples in the body. + auto NumRecords = readNumber<uint32_t>(); + if (std::error_code EC = NumRecords.getError()) + return EC; + + for (uint32_t I = 0; I < *NumRecords; ++I) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) return EC; - Profiles[*FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[*FName]; + if (!isOffsetLegal(*LineOffset)) { + return std::error_code(); + } - auto Val = readNumber<unsigned>(); - if (std::error_code EC = Val.getError()) + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) return EC; - FProfile.addTotalSamples(*Val); - Val = readNumber<unsigned>(); - if (std::error_code EC = Val.getError()) + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) return EC; - FProfile.addHeadSamples(*Val); - // Read the samples in the body. - auto NumRecords = readNumber<unsigned>(); - if (std::error_code EC = NumRecords.getError()) + auto NumCalls = readNumber<uint32_t>(); + if (std::error_code EC = NumCalls.getError()) return EC; - for (unsigned I = 0; I < *NumRecords; ++I) { - auto LineOffset = readNumber<uint64_t>(); - if (std::error_code EC = LineOffset.getError()) - return EC; - auto Discriminator = readNumber<uint64_t>(); - if (std::error_code EC = Discriminator.getError()) + for (uint32_t J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readStringFromTable()); + if (std::error_code EC = CalledFunction.getError()) return EC; - auto NumSamples = readNumber<uint64_t>(); - if (std::error_code EC = NumSamples.getError()) + auto CalledFunctionSamples = readNumber<uint64_t>(); + if (std::error_code EC = CalledFunctionSamples.getError()) return EC; - auto NumCalls = readNumber<unsigned>(); - if (std::error_code EC = NumCalls.getError()) - return EC; + FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + *CalledFunction, *CalledFunctionSamples); + } + + FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + } - for (unsigned J = 0; J < *NumCalls; ++J) { - auto CalledFunction(readString()); - if (std::error_code EC = CalledFunction.getError()) - return EC; + // Read all the samples for inlined function calls. + auto NumCallsites = readNumber<uint32_t>(); + if (std::error_code EC = NumCallsites.getError()) + return EC; - auto CalledFunctionSamples = readNumber<uint64_t>(); - if (std::error_code EC = CalledFunctionSamples.getError()) - return EC; + for (uint32_t J = 0; J < *NumCallsites; ++J) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) + return EC; - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, - *CalledFunction, - *CalledFunctionSamples); - } + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) + return EC; - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); - } + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( + CallsiteLocation(*LineOffset, *Discriminator, *FName)); + if (std::error_code EC = readProfile(CalleeProfile)) + return EC; + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::read() { + while (!at_eof()) { + auto NumHeadSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumHeadSamples.getError()) + return EC; + + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + Profiles[*FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FName]; + + FProfile.addHeadSamples(*NumHeadSamples); + + if (std::error_code EC = readProfile(FProfile)) + return EC; } return sampleprof_error::success; @@ -343,6 +400,18 @@ std::error_code SampleProfileReaderBinary::readHeader() { else if (*Version != SPVersion()) return sampleprof_error::unsupported_version; + // Read the name table. + auto Size = readNumber<uint32_t>(); + if (std::error_code EC = Size.getError()) + return EC; + NameTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto Name(readString()); + if (std::error_code EC = Name.getError()) + return EC; + NameTable.push_back(*Name); + } + return sampleprof_error::success; } @@ -353,6 +422,249 @@ bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) { return Magic == SPMagic(); } +std::error_code SampleProfileReaderGCC::skipNextWord() { + uint32_t dummy; + if (!GcovBuffer.readInt(dummy)) + return sampleprof_error::truncated; + return sampleprof_error::success; +} + +template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { + if (sizeof(T) <= sizeof(uint32_t)) { + uint32_t Val; + if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) + return static_cast<T>(Val); + } else if (sizeof(T) <= sizeof(uint64_t)) { + uint64_t Val; + if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) + return static_cast<T>(Val); + } + + std::error_code EC = sampleprof_error::malformed; + reportError(0, EC.message()); + return EC; +} + +ErrorOr<StringRef> SampleProfileReaderGCC::readString() { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + return Str; +} + +std::error_code SampleProfileReaderGCC::readHeader() { + // Read the magic identifier. + if (!GcovBuffer.readGCDAFormat()) + return sampleprof_error::unrecognized_format; + + // Read the version number. Note - the GCC reader does not validate this + // version, but the profile creator generates v704. + GCOV::GCOVVersion version; + if (!GcovBuffer.readGCOVVersion(version)) + return sampleprof_error::unrecognized_format; + + if (version != GCOV::V704) + return sampleprof_error::unsupported_version; + + // Skip the empty integer. + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { + uint32_t Tag; + if (!GcovBuffer.readInt(Tag)) + return sampleprof_error::truncated; + + if (Tag != Expected) + return sampleprof_error::malformed; + + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readNameTable() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) + return EC; + + uint32_t Size; + if (!GcovBuffer.readInt(Size)) + return sampleprof_error::truncated; + + for (uint32_t I = 0; I < Size; ++I) { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + Names.push_back(Str); + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readFunctionProfiles() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) + return EC; + + uint32_t NumFunctions; + if (!GcovBuffer.readInt(NumFunctions)) + return sampleprof_error::truncated; + + InlineCallStack Stack; + for (uint32_t I = 0; I < NumFunctions; ++I) + if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readOneFunctionProfile( + const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { + uint64_t HeadCount = 0; + if (InlineStack.size() == 0) + if (!GcovBuffer.readInt64(HeadCount)) + return sampleprof_error::truncated; + + uint32_t NameIdx; + if (!GcovBuffer.readInt(NameIdx)) + return sampleprof_error::truncated; + + StringRef Name(Names[NameIdx]); + + uint32_t NumPosCounts; + if (!GcovBuffer.readInt(NumPosCounts)) + return sampleprof_error::truncated; + + uint32_t NumCallsites; + if (!GcovBuffer.readInt(NumCallsites)) + return sampleprof_error::truncated; + + FunctionSamples *FProfile = nullptr; + if (InlineStack.size() == 0) { + // If this is a top function that we have already processed, do not + // update its profile again. This happens in the presence of + // function aliases. Since these aliases share the same function + // body, there will be identical replicated profiles for the + // original function. In this case, we simply not bother updating + // the profile of the original function. + FProfile = &Profiles[Name]; + FProfile->addHeadSamples(HeadCount); + if (FProfile->getTotalSamples() > 0) + Update = false; + } else { + // Otherwise, we are reading an inlined instance. The top of the + // inline stack contains the profile of the caller. Insert this + // callee in the caller's CallsiteMap. + FunctionSamples *CallerProfile = InlineStack.front(); + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + FProfile = &CallerProfile->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, Name)); + } + + for (uint32_t I = 0; I < NumPosCounts; ++I) { + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + + uint32_t NumTargets; + if (!GcovBuffer.readInt(NumTargets)) + return sampleprof_error::truncated; + + uint64_t Count; + if (!GcovBuffer.readInt64(Count)) + return sampleprof_error::truncated; + + // The line location is encoded in the offset as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (Update) { + // Walk up the inline stack, adding the samples on this line to + // the total sample count of the callers in the chain. + for (auto CallerProfile : NewStack) + CallerProfile->addTotalSamples(Count); + + // Update the body samples for the current profile. + FProfile->addBodySamples(LineOffset, Discriminator, Count); + } + + // Process the list of functions called at an indirect call site. + // These are all the targets that a function pointer (or virtual + // function) resolved at runtime. + for (uint32_t J = 0; J < NumTargets; J++) { + uint32_t HistVal; + if (!GcovBuffer.readInt(HistVal)) + return sampleprof_error::truncated; + + if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) + return sampleprof_error::malformed; + + uint64_t TargetIdx; + if (!GcovBuffer.readInt64(TargetIdx)) + return sampleprof_error::truncated; + StringRef TargetName(Names[TargetIdx]); + + uint64_t TargetCount; + if (!GcovBuffer.readInt64(TargetCount)) + return sampleprof_error::truncated; + + if (Update) { + FunctionSamples &TargetProfile = Profiles[TargetName]; + TargetProfile.addCalledTargetSamples(LineOffset, Discriminator, + TargetName, TargetCount); + } + } + } + + // Process all the inlined callers into the current function. These + // are all the callsites that were inlined into this function. + for (uint32_t I = 0; I < NumCallsites; I++) { + // The offset is encoded as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) + return EC; + } + + return sampleprof_error::success; +} + +/// \brief Read a GCC AutoFDO profile. +/// +/// This format is generated by the Linux Perf conversion tool at +/// https://github.com/google/autofdo. +std::error_code SampleProfileReaderGCC::read() { + // Read the string table. + if (std::error_code EC = readNameTable()) + return EC; + + // Read the source profile. + if (std::error_code EC = readFunctionProfiles()) + return EC; + + return sampleprof_error::success; +} + +bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { + StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); + return Magic == "adcg*704"; +} + /// \brief Prepare a memory buffer for the contents of \p Filename. /// /// \returns an error code indicating the status of the buffer. @@ -364,7 +676,7 @@ setupMemoryBuffer(std::string Filename) { auto Buffer = std::move(BufferOrErr.get()); // Sanity check the file. - if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max()) return sampleprof_error::too_large; return std::move(Buffer); @@ -384,13 +696,29 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; + return create(BufferOrError.get(), C); +} - auto Buffer = std::move(BufferOrError.get()); +/// \brief Create a sample profile reader based on the format of the input data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr<std::unique_ptr<SampleProfileReader>> +SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) { std::unique_ptr<SampleProfileReader> Reader; - if (SampleProfileReaderBinary::hasFormat(*Buffer)) - Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C)); + if (SampleProfileReaderBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderBinary(std::move(B), C)); + else if (SampleProfileReaderGCC::hasFormat(*B)) + Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); + else if (SampleProfileReaderText::hasFormat(*B)) + Reader.reset(new SampleProfileReaderText(std::move(B), C)); else - Reader.reset(new SampleProfileReaderText(std::move(Buffer), C)); + return sampleprof_error::unrecognized_format; if (std::error_code EC = Reader->readHeader()) return EC; diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp index c95267ad976b..51feee5ad7d1 100644 --- a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -30,16 +30,27 @@ using namespace llvm::sampleprof; using namespace llvm; /// \brief Write samples to a text file. -bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { - if (S.empty()) - return true; - - OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples() - << "\n"; - - for (const auto &I : S.getBodySamples()) { - LineLocation Loc = I.first; - const SampleRecord &Sample = I.second; +/// +/// Note: it may be tempting to implement this in terms of +/// FunctionSamples::print(). Please don't. The dump functionality is intended +/// for debugging and has no specified form. +/// +/// The format used here is more structured and deliberate because +/// it needs to be parsed by the SampleProfileReaderText class. +std::error_code SampleProfileWriterText::write(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; + + OS << FName << ":" << S.getTotalSamples(); + if (Indent == 0) + OS << ":" << S.getHeadSamples(); + OS << "\n"; + + SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples()); + for (const auto &I : SortedSamples.get()) { + LineLocation Loc = I->first; + const SampleRecord &Sample = I->second; + OS.indent(Indent + 1); if (Loc.Discriminator == 0) OS << Loc.LineOffset << ": "; else @@ -52,32 +63,89 @@ bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { OS << "\n"; } - return true; + SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples( + S.getCallsiteSamples()); + Indent += 1; + for (const auto &I : SortedCallsiteSamples.get()) { + CallsiteLocation Loc = I->first; + const FunctionSamples &CalleeSamples = I->second; + OS.indent(Indent); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + if (std::error_code EC = write(Loc.CalleeName, CalleeSamples)) + return EC; + } + Indent -= 1; + + return sampleprof_error::success; } -SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F, - std::error_code &EC) - : SampleProfileWriter(F, EC, sys::fs::F_None) { - if (EC) - return; +std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { + const auto &ret = NameTable.find(FName); + if (ret == NameTable.end()) + return sampleprof_error::truncated_name_table; + encodeULEB128(ret->second, *OutputStream); + return sampleprof_error::success; +} - // Write the file header. +void SampleProfileWriterBinary::addName(StringRef FName) { + auto NextIdx = NameTable.size(); + NameTable.insert(std::make_pair(FName, NextIdx)); +} + +void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { + // Add all the names in indirect call targets. + for (const auto &I : S.getBodySamples()) { + const SampleRecord &Sample = I.second; + for (const auto &J : Sample.getCallTargets()) + addName(J.first()); + } + + // Recursively add all the names for inlined callsites. + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + addName(Loc.CalleeName); + addNames(CalleeSamples); + } +} + +std::error_code SampleProfileWriterBinary::writeHeader( + const StringMap<FunctionSamples> &ProfileMap) { + auto &OS = *OutputStream; + + // Write file magic identifier. encodeULEB128(SPMagic(), OS); encodeULEB128(SPVersion(), OS); + + // Generate the name table for all the functions referenced in the profile. + for (const auto &I : ProfileMap) { + addName(I.first()); + addNames(I.second); + } + + // Write out the name table. + encodeULEB128(NameTable.size(), OS); + for (auto N : NameTable) { + OS << N.first; + encodeULEB128(0, OS); + } + + return sampleprof_error::success; } -/// \brief Write samples to a binary file. -/// -/// \returns true if the samples were written successfully, false otherwise. -bool SampleProfileWriterBinary::write(StringRef FName, - const FunctionSamples &S) { - if (S.empty()) - return true; +std::error_code SampleProfileWriterBinary::writeBody(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; + + if (std::error_code EC = writeNameIdx(FName)) + return EC; - OS << FName; - encodeULEB128(0, OS); encodeULEB128(S.getTotalSamples(), OS); - encodeULEB128(S.getHeadSamples(), OS); + + // Emit all the body samples. encodeULEB128(S.getBodySamples().size(), OS); for (const auto &I : S.getBodySamples()) { LineLocation Loc = I.first; @@ -87,18 +155,38 @@ bool SampleProfileWriterBinary::write(StringRef FName, encodeULEB128(Sample.getSamples(), OS); encodeULEB128(Sample.getCallTargets().size(), OS); for (const auto &J : Sample.getCallTargets()) { - std::string Callee = J.first(); - unsigned CalleeSamples = J.second; - OS << Callee; - encodeULEB128(0, OS); + StringRef Callee = J.first(); + uint64_t CalleeSamples = J.second; + if (std::error_code EC = writeNameIdx(Callee)) + return EC; encodeULEB128(CalleeSamples, OS); } } - return true; + // Recursively emit all the callsite samples. + encodeULEB128(S.getCallsiteSamples().size(), OS); + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeBody(Loc.CalleeName, CalleeSamples)) + return EC; + } + + return sampleprof_error::success; } -/// \brief Create a sample profile writer based on the specified format. +/// \brief Write samples of a top-level function to a binary file. +/// +/// \returns true if the samples were written successfully, false otherwise. +std::error_code SampleProfileWriterBinary::write(StringRef FName, + const FunctionSamples &S) { + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(FName, S); +} + +/// \brief Create a sample profile file writer based on the specified format. /// /// \param Filename The file to create. /// @@ -110,12 +198,38 @@ bool SampleProfileWriterBinary::write(StringRef FName, ErrorOr<std::unique_ptr<SampleProfileWriter>> SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { std::error_code EC; + std::unique_ptr<raw_ostream> OS; + if (Format == SPF_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + else + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + if (EC) + return EC; + + return create(OS, Format); +} + +/// \brief Create a sample profile stream writer based on the specified format. +/// +/// \param OS The output stream to store the profile data to. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr<std::unique_ptr<SampleProfileWriter>> +SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format) { + std::error_code EC; std::unique_ptr<SampleProfileWriter> Writer; if (Format == SPF_Binary) - Writer.reset(new SampleProfileWriterBinary(Filename, EC)); + Writer.reset(new SampleProfileWriterBinary(OS)); else if (Format == SPF_Text) - Writer.reset(new SampleProfileWriterText(Filename, EC)); + Writer.reset(new SampleProfileWriterText(OS)); + else if (Format == SPF_GCC) + EC = sampleprof_error::unsupported_writing_format; else EC = sampleprof_error::unrecognized_format; |