aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h')
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h599
1 files changed, 599 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
new file mode 100644
index 000000000000..7536f39b8081
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
@@ -0,0 +1,599 @@
+//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This file defines the interface for a function call trie.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FUNCTION_CALL_TRIE_H
+#define XRAY_FUNCTION_CALL_TRIE_H
+
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_profiling_flags.h"
+#include "xray_segmented_array.h"
+#include <limits>
+#include <memory> // For placement new.
+#include <utility>
+
+namespace __xray {
+
+/// A FunctionCallTrie represents the stack traces of XRay instrumented
+/// functions that we've encountered, where a node corresponds to a function and
+/// the path from the root to the node its stack trace. Each node in the trie
+/// will contain some useful values, including:
+///
+/// * The cumulative amount of time spent in this particular node/stack.
+/// * The number of times this stack has appeared.
+/// * A histogram of latencies for that particular node.
+///
+/// Each node in the trie will also contain a list of callees, represented using
+/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
+/// ID of the callee, and a pointer to the node.
+///
+/// If we visualise this data structure, we'll find the following potential
+/// representation:
+///
+/// [function id node] -> [callees] [cumulative time]
+/// [call counter] [latency histogram]
+///
+/// As an example, when we have a function in this pseudocode:
+///
+/// func f(N) {
+/// g()
+/// h()
+/// for i := 1..N { j() }
+/// }
+///
+/// We may end up with a trie of the following form:
+///
+/// f -> [ g, h, j ] [...] [1] [...]
+/// g -> [ ... ] [...] [1] [...]
+/// h -> [ ... ] [...] [1] [...]
+/// j -> [ ... ] [...] [N] [...]
+///
+/// If for instance the function g() called j() like so:
+///
+/// func g() {
+/// for i := 1..10 { j() }
+/// }
+///
+/// We'll find the following updated trie:
+///
+/// f -> [ g, h, j ] [...] [1] [...]
+/// g -> [ j' ] [...] [1] [...]
+/// h -> [ ... ] [...] [1] [...]
+/// j -> [ ... ] [...] [N] [...]
+/// j' -> [ ... ] [...] [10] [...]
+///
+/// Note that we'll have a new node representing the path `f -> g -> j'` with
+/// isolated data. This isolation gives us a means of representing the stack
+/// traces as a path, as opposed to a key in a table. The alternative
+/// implementation here would be to use a separate table for the path, and use
+/// hashes of the path as an identifier to accumulate the information. We've
+/// moved away from this approach as it takes a lot of time to compute the hash
+/// every time we need to update a function's call information as we're handling
+/// the entry and exit events.
+///
+/// This approach allows us to maintain a shadow stack, which represents the
+/// currently executing path, and on function exits quickly compute the amount
+/// of time elapsed from the entry, then update the counters for the node
+/// already represented in the trie. This necessitates an efficient
+/// representation of the various data structures (the list of callees must be
+/// cache-aware and efficient to look up, and the histogram must be compact and
+/// quick to update) to enable us to keep the overheads of this implementation
+/// to the minimum.
+class FunctionCallTrie {
+public:
+ struct Node;
+
+ // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
+ // standard library types in this header.
+ struct NodeIdPair {
+ Node *NodePtr;
+ int32_t FId;
+ };
+
+ using NodeIdPairArray = Array<NodeIdPair>;
+ using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
+
+ // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
+ // number of times this node actually appeared, the cumulative amount of time
+ // for this particular node including its children call times, and just the
+ // local time spent on this node. Each Node will have the ID of the XRay
+ // instrumented function that it is associated to.
+ struct Node {
+ Node *Parent;
+ NodeIdPairArray Callees;
+ uint64_t CallCount;
+ uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
+ int32_t FId;
+
+ // TODO: Include the compact histogram.
+ };
+
+private:
+ struct ShadowStackEntry {
+ uint64_t EntryTSC;
+ Node *NodePtr;
+ uint16_t EntryCPU;
+ };
+
+ using NodeArray = Array<Node>;
+ using RootArray = Array<Node *>;
+ using ShadowStackArray = Array<ShadowStackEntry>;
+
+public:
+ // We collate the allocators we need into a single struct, as a convenience to
+ // allow us to initialize these as a group.
+ struct Allocators {
+ using NodeAllocatorType = NodeArray::AllocatorType;
+ using RootAllocatorType = RootArray::AllocatorType;
+ using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
+
+ // Use hosted aligned storage members to allow for trivial move and init.
+ // This also allows us to sidestep the potential-failing allocation issue.
+ alignas(NodeAllocatorType) std::byte
+ NodeAllocatorStorage[sizeof(NodeAllocatorType)];
+ alignas(RootAllocatorType) std::byte
+ RootAllocatorStorage[sizeof(RootAllocatorType)];
+ alignas(ShadowStackAllocatorType) std::byte
+ ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
+ alignas(NodeIdPairAllocatorType) std::byte
+ NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
+
+ NodeAllocatorType *NodeAllocator = nullptr;
+ RootAllocatorType *RootAllocator = nullptr;
+ ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
+ NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
+
+ Allocators() = default;
+ Allocators(const Allocators &) = delete;
+ Allocators &operator=(const Allocators &) = delete;
+
+ struct Buffers {
+ BufferQueue::Buffer NodeBuffer;
+ BufferQueue::Buffer RootsBuffer;
+ BufferQueue::Buffer ShadowStackBuffer;
+ BufferQueue::Buffer NodeIdPairBuffer;
+ };
+
+ explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage)
+ RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
+ B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
+ B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
+ explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage) NodeAllocatorType(Max);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage) RootAllocatorType(Max);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
+ Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
+ // Here we rely on the safety of memcpy'ing contents of the storage
+ // members, and then pointing the source pointers to nullptr.
+ internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
+ sizeof(NodeAllocatorType));
+ internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
+ sizeof(RootAllocatorType));
+ internal_memcpy(&ShadowStackAllocatorStorage,
+ &O.ShadowStackAllocatorStorage,
+ sizeof(ShadowStackAllocatorType));
+ internal_memcpy(&NodeIdPairAllocatorStorage,
+ &O.NodeIdPairAllocatorStorage,
+ sizeof(NodeIdPairAllocatorType));
+
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+
+ O.NodeAllocator = nullptr;
+ O.RootAllocator = nullptr;
+ O.ShadowStackAllocator = nullptr;
+ O.NodeIdPairAllocator = nullptr;
+ }
+
+ Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
+ // When moving into an existing instance, we ensure that we clean up the
+ // current allocators.
+ if (NodeAllocator)
+ NodeAllocator->~NodeAllocatorType();
+ if (O.NodeAllocator) {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(std::move(*O.NodeAllocator));
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ O.NodeAllocator = nullptr;
+ } else {
+ NodeAllocator = nullptr;
+ }
+
+ if (RootAllocator)
+ RootAllocator->~RootAllocatorType();
+ if (O.RootAllocator) {
+ new (&RootAllocatorStorage)
+ RootAllocatorType(std::move(*O.RootAllocator));
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ O.RootAllocator = nullptr;
+ } else {
+ RootAllocator = nullptr;
+ }
+
+ if (ShadowStackAllocator)
+ ShadowStackAllocator->~ShadowStackAllocatorType();
+ if (O.ShadowStackAllocator) {
+ new (&ShadowStackAllocatorStorage)
+ ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ O.ShadowStackAllocator = nullptr;
+ } else {
+ ShadowStackAllocator = nullptr;
+ }
+
+ if (NodeIdPairAllocator)
+ NodeIdPairAllocator->~NodeIdPairAllocatorType();
+ if (O.NodeIdPairAllocator) {
+ new (&NodeIdPairAllocatorStorage)
+ NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ O.NodeIdPairAllocator = nullptr;
+ } else {
+ NodeIdPairAllocator = nullptr;
+ }
+
+ return *this;
+ }
+
+ ~Allocators() XRAY_NEVER_INSTRUMENT {
+ if (NodeAllocator != nullptr)
+ NodeAllocator->~NodeAllocatorType();
+ if (RootAllocator != nullptr)
+ RootAllocator->~RootAllocatorType();
+ if (ShadowStackAllocator != nullptr)
+ ShadowStackAllocator->~ShadowStackAllocatorType();
+ if (NodeIdPairAllocator != nullptr)
+ NodeIdPairAllocator->~NodeIdPairAllocatorType();
+ }
+ };
+
+ static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
+ return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
+ }
+
+ static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
+ Allocators A(Max);
+ return A;
+ }
+
+ static Allocators
+ InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
+ Allocators A(Bufs);
+ return A;
+ }
+
+private:
+ NodeArray Nodes;
+ RootArray Roots;
+ ShadowStackArray ShadowStack;
+ NodeIdPairAllocatorType *NodeIdPairAllocator;
+ uint32_t OverflowedFunctions;
+
+public:
+ explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
+ : Nodes(*A.NodeAllocator),
+ Roots(*A.RootAllocator),
+ ShadowStack(*A.ShadowStackAllocator),
+ NodeIdPairAllocator(A.NodeIdPairAllocator),
+ OverflowedFunctions(0) {}
+
+ FunctionCallTrie() = delete;
+ FunctionCallTrie(const FunctionCallTrie &) = delete;
+ FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
+
+ FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
+ : Nodes(std::move(O.Nodes)),
+ Roots(std::move(O.Roots)),
+ ShadowStack(std::move(O.ShadowStack)),
+ NodeIdPairAllocator(O.NodeIdPairAllocator),
+ OverflowedFunctions(O.OverflowedFunctions) {}
+
+ FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
+ Nodes = std::move(O.Nodes);
+ Roots = std::move(O.Roots);
+ ShadowStack = std::move(O.ShadowStack);
+ NodeIdPairAllocator = O.NodeIdPairAllocator;
+ OverflowedFunctions = O.OverflowedFunctions;
+ return *this;
+ }
+
+ ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
+
+ void enterFunction(const int32_t FId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(FId, 0);
+
+ // If we're already overflowed the function call stack, do not bother
+ // attempting to record any more function entries.
+ if (UNLIKELY(OverflowedFunctions)) {
+ ++OverflowedFunctions;
+ return;
+ }
+
+ // If this is the first function we've encountered, we want to set up the
+ // node(s) and treat it as a root.
+ if (UNLIKELY(ShadowStack.empty())) {
+ auto *NewRoot = Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
+ if (UNLIKELY(NewRoot == nullptr))
+ return;
+ if (Roots.AppendEmplace(NewRoot) == nullptr) {
+ Nodes.trim(1);
+ return;
+ }
+ if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
+ Nodes.trim(1);
+ Roots.trim(1);
+ ++OverflowedFunctions;
+ return;
+ }
+ return;
+ }
+
+ // From this point on, we require that the stack is not empty.
+ DCHECK(!ShadowStack.empty());
+ auto TopNode = ShadowStack.back().NodePtr;
+ DCHECK_NE(TopNode, nullptr);
+
+ // If we've seen this callee before, then we access that node and place that
+ // on the top of the stack.
+ auto* Callee = TopNode->Callees.find_element(
+ [FId](const NodeIdPair &NR) { return NR.FId == FId; });
+ if (Callee != nullptr) {
+ CHECK_NE(Callee->NodePtr, nullptr);
+ if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
+ ++OverflowedFunctions;
+ return;
+ }
+
+ // This means we've never seen this stack before, create a new node here.
+ auto* NewNode = Nodes.AppendEmplace(
+ TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
+ if (UNLIKELY(NewNode == nullptr))
+ return;
+ DCHECK_NE(NewNode, nullptr);
+ TopNode->Callees.AppendEmplace(NewNode, FId);
+ if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
+ ++OverflowedFunctions;
+ return;
+ }
+
+ void exitFunction(int32_t FId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ // If we're exiting functions that have "overflowed" or don't fit into the
+ // stack due to allocator constraints, we then decrement that count first.
+ if (OverflowedFunctions) {
+ --OverflowedFunctions;
+ return;
+ }
+
+ // When we exit a function, we look up the ShadowStack to see whether we've
+ // entered this function before. We do as little processing here as we can,
+ // since most of the hard work would have already been done at function
+ // entry.
+ uint64_t CumulativeTreeTime = 0;
+
+ while (!ShadowStack.empty()) {
+ const auto &Top = ShadowStack.back();
+ auto TopNode = Top.NodePtr;
+ DCHECK_NE(TopNode, nullptr);
+
+ // We may encounter overflow on the TSC we're provided, which may end up
+ // being less than the TSC when we first entered the function.
+ //
+ // To get the accurate measurement of cycles, we need to check whether
+ // we've overflowed (TSC < Top.EntryTSC) and then account the difference
+ // between the entry TSC and the max for the TSC counter (max of uint64_t)
+ // then add the value of TSC. We can prove that the maximum delta we will
+ // get is at most the 64-bit unsigned value, since the difference between
+ // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
+ // - 1) + 1.
+ //
+ // NOTE: This assumes that TSCs are synchronised across CPUs.
+ // TODO: Count the number of times we've seen CPU migrations.
+ uint64_t LocalTime =
+ Top.EntryTSC > TSC
+ ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
+ : TSC - Top.EntryTSC;
+ TopNode->CallCount++;
+ TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
+ CumulativeTreeTime += LocalTime;
+ ShadowStack.trim(1);
+
+ // TODO: Update the histogram for the node.
+ if (TopNode->FId == FId)
+ break;
+ }
+ }
+
+ const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
+
+ // The deepCopyInto operation will update the provided FunctionCallTrie by
+ // re-creating the contents of this particular FunctionCallTrie in the other
+ // FunctionCallTrie. It will do this using a Depth First Traversal from the
+ // roots, and while doing so recreating the traversal in the provided
+ // FunctionCallTrie.
+ //
+ // This operation will *not* destroy the state in `O`, and thus may cause some
+ // duplicate entries in `O` if it is not empty.
+ //
+ // This function is *not* thread-safe, and may require external
+ // synchronisation of both "this" and |O|.
+ //
+ // This function must *not* be called with a non-empty FunctionCallTrie |O|.
+ void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
+ DCHECK(O.getRoots().empty());
+
+ // We then push the root into a stack, to use as the parent marker for new
+ // nodes we push in as we're traversing depth-first down the call tree.
+ struct NodeAndParent {
+ FunctionCallTrie::Node *Node;
+ FunctionCallTrie::Node *NewNode;
+ };
+ using Stack = Array<NodeAndParent>;
+
+ typename Stack::AllocatorType StackAllocator(
+ profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator);
+
+ for (const auto Root : getRoots()) {
+ // Add a node in O for this root.
+ auto NewRoot = O.Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
+ Root->CumulativeLocalTime, Root->FId);
+
+ // Because we cannot allocate more memory we should bail out right away.
+ if (UNLIKELY(NewRoot == nullptr))
+ return;
+
+ if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
+ return;
+
+ // TODO: Figure out what to do if we fail to allocate any more stack
+ // space. Maybe warn or report once?
+ if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
+ return;
+ while (!DFSStack.empty()) {
+ NodeAndParent NP = DFSStack.back();
+ DCHECK_NE(NP.Node, nullptr);
+ DCHECK_NE(NP.NewNode, nullptr);
+ DFSStack.trim(1);
+ for (const auto Callee : NP.Node->Callees) {
+ auto NewNode = O.Nodes.AppendEmplace(
+ NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
+ Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
+ Callee.FId);
+ if (UNLIKELY(NewNode == nullptr))
+ return;
+ if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
+ nullptr))
+ return;
+ if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
+ nullptr))
+ return;
+ }
+ }
+ }
+ }
+
+ // The mergeInto operation will update the provided FunctionCallTrie by
+ // traversing the current trie's roots and updating (i.e. merging) the data in
+ // the nodes with the data in the target's nodes. If the node doesn't exist in
+ // the provided trie, we add a new one in the right position, and inherit the
+ // data from the original (current) trie, along with all its callees.
+ //
+ // This function is *not* thread-safe, and may require external
+ // synchronisation of both "this" and |O|.
+ void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
+ struct NodeAndTarget {
+ FunctionCallTrie::Node *OrigNode;
+ FunctionCallTrie::Node *TargetNode;
+ };
+ using Stack = Array<NodeAndTarget>;
+ typename Stack::AllocatorType StackAllocator(
+ profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator);
+
+ for (const auto Root : getRoots()) {
+ Node *TargetRoot = nullptr;
+ auto R = O.Roots.find_element(
+ [&](const Node *Node) { return Node->FId == Root->FId; });
+ if (R == nullptr) {
+ TargetRoot = O.Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Root->FId);
+ if (UNLIKELY(TargetRoot == nullptr))
+ return;
+
+ O.Roots.Append(TargetRoot);
+ } else {
+ TargetRoot = *R;
+ }
+
+ DFSStack.AppendEmplace(Root, TargetRoot);
+ while (!DFSStack.empty()) {
+ NodeAndTarget NT = DFSStack.back();
+ DCHECK_NE(NT.OrigNode, nullptr);
+ DCHECK_NE(NT.TargetNode, nullptr);
+ DFSStack.trim(1);
+ // TODO: Update the histogram as well when we have it ready.
+ NT.TargetNode->CallCount += NT.OrigNode->CallCount;
+ NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
+ for (const auto Callee : NT.OrigNode->Callees) {
+ auto TargetCallee = NT.TargetNode->Callees.find_element(
+ [&](const FunctionCallTrie::NodeIdPair &C) {
+ return C.FId == Callee.FId;
+ });
+ if (TargetCallee == nullptr) {
+ auto NewTargetNode = O.Nodes.AppendEmplace(
+ NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Callee.FId);
+
+ if (UNLIKELY(NewTargetNode == nullptr))
+ return;
+
+ TargetCallee =
+ NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
+ }
+ DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
+ }
+ }
+ }
+ }
+};
+
+} // namespace __xray
+
+#endif // XRAY_FUNCTION_CALL_TRIE_H