diff options
Diffstat (limited to 'contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp')
-rw-r--r-- | contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp | 198 |
1 files changed, 111 insertions, 87 deletions
diff --git a/contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp index 0859c5a20b7e..02f1d2f24d8c 100644 --- a/contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -44,11 +44,65 @@ void IntelPTError::log(llvm::raw_ostream &OS) const { OS << formatv(": {0:x+16}", m_address); } -int64_t DecodedThread::GetItemsCount() const { - return static_cast<int64_t>(m_item_kinds.size()); +bool DecodedThread::TSCRange::InRange(uint64_t item_index) const { + return item_index >= first_item_index && + item_index < first_item_index + items_count; +} + +bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const { + return item_index >= first_item_index && + item_index < first_item_index + items_count; +} + +double DecodedThread::NanosecondsRange::GetInterpolatedTime( + uint64_t item_index, uint64_t begin_of_time_nanos, + const LinuxPerfZeroTscConversion &tsc_conversion) const { + uint64_t items_since_last_tsc = item_index - first_item_index; + + auto interpolate = [&](uint64_t next_range_start_ns) { + if (next_range_start_ns == nanos) { + // If the resolution of the conversion formula is bad enough to consider + // these two timestamps as equal, then we just increase the next one by 1 + // for correction + next_range_start_ns++; + } + long double item_duration = + static_cast<long double>(items_count) / (next_range_start_ns - nanos); + return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration; + }; + + if (!next_range) { + // If this is the last TSC range, so we have to extrapolate. In this case, + // we assume that each instruction took one TSC, which is what an + // instruction would take if no parallelism is achieved and the frequency + // multiplier is 1. + return interpolate(tsc_conversion.ToNanos(tsc + items_count)); + } + if (items_count < (next_range->tsc - tsc)) { + // If the numbers of items in this range is less than the total TSC duration + // of this range, i.e. each instruction taking longer than 1 TSC, then we + // can assume that something else happened between these TSCs (e.g. a + // context switch, change to kernel, decoding errors, etc). In this case, we + // also assume that each instruction took 1 TSC. A proper way to improve + // this would be to analize the next events in the trace looking for context + // switches or trace disablement events, but for now, as we only want an + // approximation, we keep it simple. We are also guaranteed that the time in + // nanos of the next range is different to the current one, just because of + // the definition of a NanosecondsRange. + return interpolate( + std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos)); + } + + // In this case, each item took less than 1 TSC, so some parallelism was + // achieved, which is an indication that we didn't suffered of any kind of + // interruption. + return interpolate(next_range->nanos); } -lldb::addr_t DecodedThread::GetInstructionLoadAddress(size_t item_index) const { +uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); } + +lldb::addr_t +DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { return m_item_data[item_index].load_address; } @@ -58,33 +112,69 @@ DecodedThread::TraceItemStorage & DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) { m_item_kinds.push_back(kind); m_item_data.emplace_back(); + if (m_last_tsc) + (*m_last_tsc)->second.items_count++; + if (m_last_nanoseconds) + (*m_last_nanoseconds)->second.items_count++; return m_item_data.back(); } -void DecodedThread::NotifyTsc(uint64_t tsc) { - if (!m_last_tsc || *m_last_tsc != tsc) { - m_timestamps.emplace(m_item_kinds.size(), tsc); - m_last_tsc = tsc; +void DecodedThread::NotifyTsc(TSC tsc) { + if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc) + return; + + m_last_tsc = + m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first; + + if (m_tsc_conversion) { + uint64_t nanos = m_tsc_conversion->ToNanos(tsc); + if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) { + m_last_nanoseconds = + m_nanoseconds + .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0, + GetItemsCount()}) + .first; + if (*m_last_nanoseconds != m_nanoseconds.begin()) { + auto prev_range = prev(*m_last_nanoseconds); + prev_range->second.next_range = &(*m_last_nanoseconds)->second; + } + } } + AppendEvent(lldb::eTraceEventHWClockTick); } void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { if (!m_last_cpu || *m_last_cpu != cpu_id) { - m_cpus.emplace(m_item_kinds.size(), cpu_id); + m_cpus.emplace(GetItemsCount(), cpu_id); m_last_cpu = cpu_id; AppendEvent(lldb::eTraceEventCPUChanged); } } Optional<lldb::cpu_id_t> -DecodedThread::GetCPUByIndex(uint64_t insn_index) const { - // Could possibly optimize the search - auto it = m_cpus.upper_bound(insn_index); +DecodedThread::GetCPUByIndex(uint64_t item_index) const { + auto it = m_cpus.upper_bound(item_index); if (it == m_cpus.begin()) return None; return prev(it)->second; } +Optional<DecodedThread::TSCRange> +DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const { + auto next_it = m_tscs.upper_bound(item_index); + if (next_it == m_tscs.begin()) + return None; + return prev(next_it)->second; +} + +Optional<DecodedThread::NanosecondsRange> +DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) { + auto next_it = m_nanoseconds.upper_bound(item_index); + if (next_it == m_nanoseconds.begin()) + return None; + return prev(next_it)->second; +} + void DecodedThread::AppendEvent(lldb::TraceEvent event) { CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; m_events_stats.RecordEvent(event); @@ -134,90 +224,24 @@ void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) { total_count++; } -Optional<DecodedThread::TscRange> DecodedThread::CalculateTscRange( - size_t insn_index, - const Optional<DecodedThread::TscRange> &hint_range) const { - // We first try to check the given hint range in case we are traversing the - // trace in short jumps. If that fails, then we do the more expensive - // arbitrary lookup. - if (hint_range) { - Optional<TscRange> candidate_range; - if (insn_index < hint_range->GetStartInstructionIndex()) - candidate_range = hint_range->Prev(); - else if (insn_index > hint_range->GetEndInstructionIndex()) - candidate_range = hint_range->Next(); - else - candidate_range = hint_range; - - if (candidate_range && candidate_range->InRange(insn_index)) - return candidate_range; - } - // Now we do a more expensive lookup - auto it = m_timestamps.upper_bound(insn_index); - if (it == m_timestamps.begin()) - return None; - - return TscRange(--it, *this); -} - -lldb::TraceItemKind DecodedThread::GetItemKindByIndex(size_t item_index) const { +lldb::TraceItemKind +DecodedThread::GetItemKindByIndex(uint64_t item_index) const { return static_cast<lldb::TraceItemKind>(m_item_kinds[item_index]); } -const char *DecodedThread::GetErrorByIndex(size_t item_index) const { +const char *DecodedThread::GetErrorByIndex(uint64_t item_index) const { return m_item_data[item_index].error; } -DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {} - -lldb::TraceCursorUP DecodedThread::CreateNewCursor() { - return std::make_unique<TraceCursorIntelPT>(m_thread_sp, shared_from_this()); -} +DecodedThread::DecodedThread( + ThreadSP thread_sp, + const llvm::Optional<LinuxPerfZeroTscConversion> &tsc_conversion) + : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {} size_t DecodedThread::CalculateApproximateMemoryUsage() const { return sizeof(TraceItemStorage) * m_item_data.size() + sizeof(uint8_t) * m_item_kinds.size() + - (sizeof(size_t) + sizeof(uint64_t)) * m_timestamps.size() + - (sizeof(size_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); -} - -DecodedThread::TscRange::TscRange(std::map<size_t, uint64_t>::const_iterator it, - const DecodedThread &decoded_thread) - : m_it(it), m_decoded_thread(&decoded_thread) { - auto next_it = m_it; - ++next_it; - m_end_index = (next_it == m_decoded_thread->m_timestamps.end()) - ? std::numeric_limits<uint64_t>::max() - : next_it->first - 1; -} - -size_t DecodedThread::TscRange::GetTsc() const { return m_it->second; } - -size_t DecodedThread::TscRange::GetStartInstructionIndex() const { - return m_it->first; -} - -size_t DecodedThread::TscRange::GetEndInstructionIndex() const { - return m_end_index; -} - -bool DecodedThread::TscRange::InRange(size_t insn_index) const { - return GetStartInstructionIndex() <= insn_index && - insn_index <= GetEndInstructionIndex(); -} - -Optional<DecodedThread::TscRange> DecodedThread::TscRange::Next() const { - auto next_it = m_it; - ++next_it; - if (next_it == m_decoded_thread->m_timestamps.end()) - return None; - return TscRange(next_it, *m_decoded_thread); -} - -Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev() const { - if (m_it == m_decoded_thread->m_timestamps.begin()) - return None; - auto prev_it = m_it; - --prev_it; - return TscRange(prev_it, *m_decoded_thread); + (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() + + (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() + + (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); } |