aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/compiler-rt/lib/xray
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/xray')
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt4
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp127
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h288
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt6
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp164
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp49
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h37
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc23
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp515
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h42
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp237
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h280
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h31
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h372
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp47
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h37
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc28
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h75
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h231
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp757
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h38
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp84
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h39
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc49
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h603
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp131
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp518
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h104
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp209
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp170
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp178
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt6
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp113
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc36
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp414
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h73
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp519
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp39
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h38
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc31
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h56
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h650
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S163
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S105
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S109
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S135
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp15
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S235
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S289
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h90
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp199
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h85
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp359
-rw-r--r--contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc33
54 files changed, 9265 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt
new file mode 100644
index 000000000000..963fff2d697e
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt
@@ -0,0 +1,4 @@
+___start_xray_fn_idx
+___start_xray_instr_map
+___stop_xray_fn_idx
+___stop_xray_instr_map
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp
new file mode 100644
index 000000000000..00105d30b4db
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp
@@ -0,0 +1,127 @@
+//===-- xray_AArch64.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of AArch64-specific routines (64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+extern "C" void __clear_cache(void *start, void *end);
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+ PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
+ PO_LdrW0_12 = 0x18000060, // LDR W0, #12
+ PO_LdrX16_12 = 0x58000070, // LDR X16, #12
+ PO_BlrX16 = 0xD63F0200, // BLR X16
+ PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16
+ PO_B32 = 0x14000008 // B #32
+};
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B #32
+ // 7 NOPs (24 bytes)
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n:
+ // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
+ // LDR W0, #12 ; W0 := function ID
+ // LDR X16,#12 ; X16 := address of the trampoline
+ // BLR X16
+ // ;DATA: 32 bits of function ID
+ // ;DATA: lower 32 bits of the address of the trampoline
+ // ;DATA: higher 32 bits of the address of the trampoline
+ // LDP X0, X30, [SP], #16 ; POP {r0, lr}
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #32
+
+ uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address());
+ uint32_t *CurAddress = FirstAddress + 1;
+ if (Enable) {
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
+ CurAddress++;
+ *CurAddress = FuncId;
+ CurAddress++;
+ *reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
+ CurAddress += 2;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
+ CurAddress++;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
+ }
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64?
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in aarch64?
+ return false;
+}
+
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h
new file mode 100644
index 000000000000..4b42c473261d
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h
@@ -0,0 +1,288 @@
+//===-- xray_allocator.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the allocator interface for an arena allocator, used primarily for
+// the profiling runtime.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_ALLOCATOR_H
+#define XRAY_ALLOCATOR_H
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#if SANITIZER_FUCHSIA
+#include <zircon/process.h>
+#include <zircon/status.h>
+#include <zircon/syscalls.h>
+#else
+#include "sanitizer_common/sanitizer_posix.h"
+#endif
+#include "xray_defs.h"
+#include "xray_utils.h"
+#include <cstddef>
+#include <cstdint>
+#include <sys/mman.h>
+
+namespace __xray {
+
+// We implement our own memory allocation routine which will bypass the
+// internal allocator. This allows us to manage the memory directly, using
+// mmap'ed memory to back the allocators.
+template <class T> T *allocate() XRAY_NEVER_INSTRUMENT {
+ uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached());
+#if SANITIZER_FUCHSIA
+ zx_handle_t Vmo;
+ zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo);
+ if (Status != ZX_OK) {
+ if (Verbosity())
+ Report("XRay Profiling: Failed to create VMO of size %zu: %s\n",
+ sizeof(T), _zx_status_get_string(Status));
+ return nullptr;
+ }
+ uintptr_t B;
+ Status =
+ _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
+ Vmo, 0, sizeof(T), &B);
+ _zx_handle_close(Vmo);
+ if (Status != ZX_OK) {
+ if (Verbosity())
+ Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T),
+ _zx_status_get_string(Status));
+ return nullptr;
+ }
+ return reinterpret_cast<T *>(B);
+#else
+ uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ int ErrNo = 0;
+ if (UNLIKELY(internal_iserror(B, &ErrNo))) {
+ if (Verbosity())
+ Report(
+ "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n",
+ RoundedSize, B);
+ return nullptr;
+ }
+#endif
+ return reinterpret_cast<T *>(B);
+}
+
+template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT {
+ if (B == nullptr)
+ return;
+ uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached());
+#if SANITIZER_FUCHSIA
+ _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
+ RoundedSize);
+#else
+ internal_munmap(B, RoundedSize);
+#endif
+}
+
+template <class T = unsigned char>
+T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
+ uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached());
+#if SANITIZER_FUCHSIA
+ zx_handle_t Vmo;
+ zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo);
+ if (Status != ZX_OK) {
+ if (Verbosity())
+ Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S,
+ _zx_status_get_string(Status));
+ return nullptr;
+ }
+ uintptr_t B;
+ Status = _zx_vmar_map(_zx_vmar_root_self(),
+ ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B);
+ _zx_handle_close(Vmo);
+ if (Status != ZX_OK) {
+ if (Verbosity())
+ Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S,
+ _zx_status_get_string(Status));
+ return nullptr;
+ }
+#else
+ uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ int ErrNo = 0;
+ if (UNLIKELY(internal_iserror(B, &ErrNo))) {
+ if (Verbosity())
+ Report(
+ "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n",
+ RoundedSize, B);
+ return nullptr;
+ }
+#endif
+ return reinterpret_cast<T *>(B);
+}
+
+template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT {
+ if (B == nullptr)
+ return;
+ uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached());
+#if SANITIZER_FUCHSIA
+ _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
+ RoundedSize);
+#else
+ internal_munmap(B, RoundedSize);
+#endif
+}
+
+template <class T, class... U>
+T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT {
+ auto A = allocateBuffer<T>(N);
+ if (A != nullptr)
+ while (N > 0)
+ new (A + (--N)) T(std::forward<U>(Us)...);
+ return A;
+}
+
+/// The Allocator type hands out fixed-sized chunks of memory that are
+/// cache-line aligned and sized. This is useful for placement of
+/// performance-sensitive data in memory that's frequently accessed. The
+/// allocator also self-limits the peak memory usage to a dynamically defined
+/// maximum.
+///
+/// N is the lower-bound size of the block of memory to return from the
+/// allocation function. N is used to compute the size of a block, which is
+/// cache-line-size multiples worth of memory. We compute the size of a block by
+/// determining how many cache lines worth of memory is required to subsume N.
+///
+/// The Allocator instance will manage its own memory acquired through mmap.
+/// This severely constrains the platforms on which this can be used to POSIX
+/// systems where mmap semantics are well-defined.
+///
+/// FIXME: Isolate the lower-level memory management to a different abstraction
+/// that can be platform-specific.
+template <size_t N> struct Allocator {
+ // The Allocator returns memory as Block instances.
+ struct Block {
+ /// Compute the minimum cache-line size multiple that is >= N.
+ static constexpr auto Size = nearest_boundary(N, kCacheLineSize);
+ void *Data;
+ };
+
+private:
+ size_t MaxMemory{0};
+ unsigned char *BackingStore = nullptr;
+ unsigned char *AlignedNextBlock = nullptr;
+ size_t AllocatedBlocks = 0;
+ bool Owned;
+ SpinMutex Mutex{};
+
+ void *Alloc() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Lock(&Mutex);
+ if (UNLIKELY(BackingStore == nullptr)) {
+ BackingStore = allocateBuffer(MaxMemory);
+ if (BackingStore == nullptr) {
+ if (Verbosity())
+ Report("XRay Profiling: Failed to allocate memory for allocator.\n");
+ return nullptr;
+ }
+
+ AlignedNextBlock = BackingStore;
+
+ // Ensure that NextBlock is aligned appropriately.
+ auto BackingStoreNum = reinterpret_cast<uintptr_t>(BackingStore);
+ auto AlignedNextBlockNum = nearest_boundary(
+ reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize);
+ if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) {
+ deallocateBuffer(BackingStore, MaxMemory);
+ AlignedNextBlock = BackingStore = nullptr;
+ if (Verbosity())
+ Report("XRay Profiling: Cannot obtain enough memory from "
+ "preallocated region.\n");
+ return nullptr;
+ }
+
+ AlignedNextBlock = reinterpret_cast<unsigned char *>(AlignedNextBlockNum);
+
+ // Assert that AlignedNextBlock is cache-line aligned.
+ DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize,
+ 0);
+ }
+
+ if (((AllocatedBlocks + 1) * Block::Size) > MaxMemory)
+ return nullptr;
+
+ // Align the pointer we'd like to return to an appropriate alignment, then
+ // advance the pointer from where to start allocations.
+ void *Result = AlignedNextBlock;
+ AlignedNextBlock =
+ reinterpret_cast<unsigned char *>(AlignedNextBlock) + Block::Size;
+ ++AllocatedBlocks;
+ return Result;
+ }
+
+public:
+ explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT
+ : MaxMemory(RoundUpTo(M, kCacheLineSize)),
+ BackingStore(nullptr),
+ AlignedNextBlock(nullptr),
+ AllocatedBlocks(0),
+ Owned(true),
+ Mutex() {}
+
+ explicit Allocator(void *P, size_t M) XRAY_NEVER_INSTRUMENT
+ : MaxMemory(M),
+ BackingStore(reinterpret_cast<unsigned char *>(P)),
+ AlignedNextBlock(reinterpret_cast<unsigned char *>(P)),
+ AllocatedBlocks(0),
+ Owned(false),
+ Mutex() {}
+
+ Allocator(const Allocator &) = delete;
+ Allocator &operator=(const Allocator &) = delete;
+
+ Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock L0(&Mutex);
+ SpinMutexLock L1(&O.Mutex);
+ MaxMemory = O.MaxMemory;
+ O.MaxMemory = 0;
+ BackingStore = O.BackingStore;
+ O.BackingStore = nullptr;
+ AlignedNextBlock = O.AlignedNextBlock;
+ O.AlignedNextBlock = nullptr;
+ AllocatedBlocks = O.AllocatedBlocks;
+ O.AllocatedBlocks = 0;
+ Owned = O.Owned;
+ O.Owned = false;
+ }
+
+ Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock L0(&Mutex);
+ SpinMutexLock L1(&O.Mutex);
+ MaxMemory = O.MaxMemory;
+ O.MaxMemory = 0;
+ if (BackingStore != nullptr)
+ deallocateBuffer(BackingStore, MaxMemory);
+ BackingStore = O.BackingStore;
+ O.BackingStore = nullptr;
+ AlignedNextBlock = O.AlignedNextBlock;
+ O.AlignedNextBlock = nullptr;
+ AllocatedBlocks = O.AllocatedBlocks;
+ O.AllocatedBlocks = 0;
+ Owned = O.Owned;
+ O.Owned = false;
+ return *this;
+ }
+
+ Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; }
+
+ ~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT {
+ if (Owned && BackingStore != nullptr) {
+ deallocateBuffer(BackingStore, MaxMemory);
+ }
+ }
+};
+
+} // namespace __xray
+
+#endif // XRAY_ALLOCATOR_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt
new file mode 100644
index 000000000000..151ed703dd56
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# always instrument. You can use this as an argument to
+# -fxray-always-instrument=<path> along with your project-specific lists.
+
+# Always instrument the main function.
+fun:main
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp
new file mode 100644
index 000000000000..e1818555906c
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp
@@ -0,0 +1,164 @@
+//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of ARM-specific routines (32-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+extern "C" void __clear_cache(void *start, void *end);
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+ PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
+ PO_BlxIp = 0xE12FFF3C, // BLX ip
+ PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
+ PO_B20 = 0xEA000005 // B #20
+};
+
+// 0xUUUUWXYZ -> 0x000W0XYZ
+inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return (Value & 0xfff) | ((Value & 0xf000) << 4);
+}
+
+// 0xWXYZUUUU -> 0x000W0XYZ
+inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return getMovwMask(Value >> 16);
+}
+
+// Writes the following instructions:
+// MOVW R<regNo>, #<lower 16 bits of the |Value|>
+// MOVT R<regNo>, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+write32bitLoadReg(uint8_t regNo, uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ // This is a fatal error: we cannot just report it and continue execution.
+ assert(regNo <= 15 && "Register number must be 0 to 15.");
+ // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
+ *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value));
+ Address++;
+ // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
+ *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value));
+ return Address + 1;
+}
+
+// Writes the following instructions:
+// MOVW r0, #<lower 16 bits of the |Value|>
+// MOVT r0, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+write32bitLoadR0(uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return write32bitLoadReg(0, Address, Value);
+}
+
+// Writes the following instructions:
+// MOVW ip, #<lower 16 bits of the |Value|>
+// MOVT ip, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+write32bitLoadIP(uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return write32bitLoadReg(12, Address, Value);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B #20
+ // 6 NOPs (24 bytes)
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n:
+ // PUSH {r0, lr}
+ // MOVW r0, #<lower 16 bits of function ID>
+ // MOVT r0, #<higher 16 bits of function ID>
+ // MOVW ip, #<lower 16 bits of address of TracingHook>
+ // MOVT ip, #<higher 16 bits of address of TracingHook>
+ // BLX ip
+ // POP {r0, lr}
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #20
+
+ uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address());
+ uint32_t *CurAddress = FirstAddress + 1;
+ if (Enable) {
+ CurAddress =
+ write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
+ CurAddress =
+ write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
+ *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+ CurAddress++;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
+ }
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm?
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in arm?
+ return false;
+}
+
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp
new file mode 100644
index 000000000000..e0a5e7bb29ee
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp
@@ -0,0 +1,49 @@
+//===-- xray_basic_flags.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay Basic flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_basic_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+/// Use via basicFlags().
+BasicFlags xray_basic_flags_dont_use_directly;
+
+void BasicFlags::setDefaults() XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_basic_flags.inc"
+#undef XRAY_FLAG
+}
+
+void registerXRayBasicFlags(FlagParser *P,
+ BasicFlags *F) XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
+ RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_basic_flags.inc"
+#undef XRAY_FLAG
+}
+
+const char *useCompilerDefinedBasicFlags() XRAY_NEVER_INSTRUMENT {
+#ifdef XRAY_BASIC_OPTIONS
+ return SANITIZER_STRINGIFY(XRAY_BASIC_OPTIONS);
+#else
+ return "";
+#endif
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h
new file mode 100644
index 000000000000..2459effa8bae
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h
@@ -0,0 +1,37 @@
+//===-- xray_basic_flags.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay Basic Mode runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_BASIC_FLAGS_H
+#define XRAY_BASIC_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+struct BasicFlags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_basic_flags.inc"
+#undef XRAY_FLAG
+
+ void setDefaults();
+};
+
+extern BasicFlags xray_basic_flags_dont_use_directly;
+extern void registerXRayBasicFlags(FlagParser *P, BasicFlags *F);
+const char *useCompilerDefinedBasicFlags();
+inline BasicFlags *basicFlags() { return &xray_basic_flags_dont_use_directly; }
+
+} // namespace __xray
+
+#endif // XRAY_BASIC_FLAGS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc
new file mode 100644
index 000000000000..fb38c540d356
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc
@@ -0,0 +1,23 @@
+//===-- xray_basic_flags.inc ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(int, func_duration_threshold_us, 5,
+ "Basic logging will try to skip functions that execute for fewer "
+ "microseconds than this threshold.")
+XRAY_FLAG(int, max_stack_depth, 64,
+ "Basic logging will keep track of at most this deep a call stack, "
+ "any more and the recordings will be dropped.")
+XRAY_FLAG(int, thread_buffer_size, 1024,
+ "The number of entries to keep on a per-thread buffer.")
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp
new file mode 100644
index 000000000000..6e8e93131451
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp
@@ -0,0 +1,515 @@
+//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of a simple in-memory log of XRay events. This defines a
+// logging function that's compatible with the XRay handler interface, and
+// routines for exporting data to files.
+//
+//===----------------------------------------------------------------------===//
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
+#include <sys/syscall.h>
+#endif
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray/xray_records.h"
+#include "xray_recursion_guard.h"
+#include "xray_basic_flags.h"
+#include "xray_basic_logging.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
+
+namespace __xray {
+
+static SpinMutex LogMutex;
+
+namespace {
+// We use elements of this type to record the entry TSC of every function ID we
+// see as we're tracing a particular thread's execution.
+struct alignas(16) StackEntry {
+ int32_t FuncId;
+ uint16_t Type;
+ uint8_t CPU;
+ uint8_t Padding;
+ uint64_t TSC;
+};
+
+static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
+
+struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
+ void *InMemoryBuffer = nullptr;
+ size_t BufferSize = 0;
+ size_t BufferOffset = 0;
+ void *ShadowStack = nullptr;
+ size_t StackSize = 0;
+ size_t StackEntries = 0;
+ __xray::LogWriter *LogWriter = nullptr;
+};
+
+struct BasicLoggingOptions {
+ int DurationFilterMicros = 0;
+ size_t MaxStackDepth = 0;
+ size_t ThreadBufferSize = 0;
+};
+} // namespace
+
+static pthread_key_t PThreadKey;
+
+static atomic_uint8_t BasicInitialized{0};
+
+struct BasicLoggingOptions GlobalOptions;
+
+thread_local atomic_uint8_t Guard{0};
+
+static atomic_uint8_t UseRealTSC{0};
+static atomic_uint64_t ThresholdTicks{0};
+static atomic_uint64_t TicksPerSec{0};
+static atomic_uint64_t CycleFrequency{NanosecondsPerSecond};
+
+static LogWriter *getLog() XRAY_NEVER_INSTRUMENT {
+ LogWriter* LW = LogWriter::Open();
+ if (LW == nullptr)
+ return LW;
+
+ static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
+ pthread_once(&DetectOnce, +[] {
+ if (atomic_load(&UseRealTSC, memory_order_acquire))
+ atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release);
+ });
+
+ // Since we're here, we get to write the header. We set it up so that the
+ // header will only be written once, at the start, and let the threads
+ // logging do writes which just append.
+ XRayFileHeader Header;
+ // Version 2 includes tail exit records.
+ // Version 3 includes pid inside records.
+ Header.Version = 3;
+ Header.Type = FileTypes::NAIVE_LOG;
+ Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire);
+
+ // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+ // before setting the values in the header.
+ Header.ConstantTSC = 1;
+ Header.NonstopTSC = 1;
+ LW->WriteAll(reinterpret_cast<char *>(&Header),
+ reinterpret_cast<char *>(&Header) + sizeof(Header));
+ return LW;
+}
+
+static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT {
+ static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
+ static LogWriter *LW = nullptr;
+ pthread_once(&OnceInit, +[] { LW = getLog(); });
+ return LW;
+}
+
+static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
+ thread_local ThreadLocalData TLD;
+ thread_local bool UNUSED TOnce = [] {
+ if (GlobalOptions.ThreadBufferSize == 0) {
+ if (Verbosity())
+ Report("Not initializing TLD since ThreadBufferSize == 0.\n");
+ return false;
+ }
+ pthread_setspecific(PThreadKey, &TLD);
+ TLD.LogWriter = getGlobalLog();
+ TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
+ InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
+ nullptr, alignof(XRayRecord)));
+ TLD.BufferSize = GlobalOptions.ThreadBufferSize;
+ TLD.BufferOffset = 0;
+ if (GlobalOptions.MaxStackDepth == 0) {
+ if (Verbosity())
+ Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
+ TLD.StackSize = 0;
+ TLD.StackEntries = 0;
+ TLD.ShadowStack = nullptr;
+ return false;
+ }
+ TLD.ShadowStack = reinterpret_cast<StackEntry *>(
+ InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
+ alignof(StackEntry)));
+ TLD.StackSize = GlobalOptions.MaxStackDepth;
+ TLD.StackEntries = 0;
+ return false;
+ }();
+ return TLD;
+}
+
+template <class RDTSC>
+void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
+ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
+ LogWriter *LW = getGlobalLog();
+ if (LW == nullptr)
+ return;
+
+ // Use a simple recursion guard, to handle cases where we're already logging
+ // and for one reason or another, this function gets called again in the same
+ // thread.
+ RecursionGuard G(Guard);
+ if (!G)
+ return;
+
+ uint8_t CPU = 0;
+ uint64_t TSC = ReadTSC(CPU);
+
+ switch (Type) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY: {
+ // Short circuit if we've reached the maximum depth of the stack.
+ if (TLD.StackEntries++ >= TLD.StackSize)
+ return;
+
+ // When we encounter an entry event, we keep track of the TSC and the CPU,
+ // and put it in the stack.
+ StackEntry E;
+ E.FuncId = FuncId;
+ E.CPU = CPU;
+ E.Type = Type;
+ E.TSC = TSC;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * (TLD.StackEntries - 1));
+ internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
+ break;
+ }
+ case XRayEntryType::EXIT:
+ case XRayEntryType::TAIL: {
+ if (TLD.StackEntries == 0)
+ break;
+
+ if (--TLD.StackEntries >= TLD.StackSize)
+ return;
+
+ // When we encounter an exit event, we check whether all the following are
+ // true:
+ //
+ // - The Function ID is the same as the most recent entry in the stack.
+ // - The CPU is the same as the most recent entry in the stack.
+ // - The Delta of the TSCs is less than the threshold amount of time we're
+ // looking to record.
+ //
+ // If all of these conditions are true, we pop the stack and don't write a
+ // record and move the record offset back.
+ StackEntry StackTop;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * TLD.StackEntries);
+ internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
+ if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
+ StackTop.TSC < TSC) {
+ auto Delta = TSC - StackTop.TSC;
+ if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) {
+ DCHECK(TLD.BufferOffset > 0);
+ TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
+ return;
+ }
+ }
+ break;
+ }
+ default:
+ // Should be unreachable.
+ DCHECK(false && "Unsupported XRayEntryType encountered.");
+ break;
+ }
+
+ // First determine whether the delta between the function's enter record and
+ // the exit record is higher than the threshold.
+ XRayRecord R;
+ R.RecordType = RecordTypes::NORMAL;
+ R.CPU = CPU;
+ R.TSC = TSC;
+ R.TId = GetTid();
+ R.PId = internal_getpid();
+ R.Type = Type;
+ R.FuncId = FuncId;
+ auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer);
+ internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
+ if (++TLD.BufferOffset == TLD.BufferSize) {
+ SpinMutexLock Lock(&LogMutex);
+ LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
+ reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
+ TLD.BufferOffset = 0;
+ TLD.StackEntries = 0;
+ }
+}
+
+template <class RDTSC>
+void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
+ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
+ auto FirstEntry =
+ reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer);
+ const auto &BuffLen = TLD.BufferSize;
+ LogWriter *LW = getGlobalLog();
+ if (LW == nullptr)
+ return;
+
+ // First we check whether there's enough space to write the data consecutively
+ // in the thread-local buffer. If not, we first flush the buffer before
+ // attempting to write the two records that must be consecutive.
+ if (TLD.BufferOffset + 2 > BuffLen) {
+ SpinMutexLock Lock(&LogMutex);
+ LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
+ reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
+ TLD.BufferOffset = 0;
+ TLD.StackEntries = 0;
+ }
+
+ // Then we write the "we have an argument" record.
+ InMemoryRawLog(FuncId, Type, ReadTSC);
+
+ RecursionGuard G(Guard);
+ if (!G)
+ return;
+
+ // And, from here on write the arg payload.
+ XRayArgPayload R;
+ R.RecordType = RecordTypes::ARG_PAYLOAD;
+ R.FuncId = FuncId;
+ R.TId = GetTid();
+ R.PId = internal_getpid();
+ R.Arg = Arg1;
+ internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
+ if (++TLD.BufferOffset == BuffLen) {
+ SpinMutexLock Lock(&LogMutex);
+ LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
+ reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
+ TLD.BufferOffset = 0;
+ TLD.StackEntries = 0;
+ }
+}
+
+void basicLoggingHandleArg0RealTSC(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLog(FuncId, Type, readTSC);
+}
+
+void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
+ XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+ TS = {0, 0};
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+ });
+}
+
+void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
+ uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC);
+}
+
+void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
+ uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLogWithArg(
+ FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+ TS = {0, 0};
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+ });
+}
+
+static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
+ ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
+ auto ExitGuard = at_scope_exit([&TLD] {
+ // Clean up dynamic resources.
+ if (TLD.InMemoryBuffer)
+ InternalFree(TLD.InMemoryBuffer);
+ if (TLD.ShadowStack)
+ InternalFree(TLD.ShadowStack);
+ if (Verbosity())
+ Report("Cleaned up log for TID: %d\n", GetTid());
+ });
+
+ if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) {
+ if (Verbosity())
+ Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(),
+ TLD.BufferOffset);
+ return;
+ }
+
+ {
+ SpinMutexLock L(&LogMutex);
+ TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer),
+ reinterpret_cast<char *>(TLD.InMemoryBuffer) +
+ (sizeof(XRayRecord) * TLD.BufferOffset));
+ }
+
+ // Because this thread's exit could be the last one trying to write to
+ // the file and that we're not able to close out the file properly, we
+ // sync instead and hope that the pending writes are flushed as the
+ // thread exits.
+ TLD.LogWriter->Flush();
+}
+
+XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
+ UNUSED size_t BufferMax, void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ uint8_t Expected = 0;
+ if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1,
+ memory_order_acq_rel)) {
+ if (Verbosity())
+ Report("Basic logging already initialized.\n");
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+ }
+
+ static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
+ pthread_once(&OnceInit, +[] {
+ pthread_key_create(&PThreadKey, TLDDestructor);
+ atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release);
+ // Initialize the global TicksPerSec value.
+ atomic_store(&TicksPerSec,
+ probeRequiredCPUFeatures() ? getTSCFrequency()
+ : NanosecondsPerSecond,
+ memory_order_release);
+ if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity())
+ Report("WARNING: Required CPU features missing for XRay instrumentation, "
+ "using emulation instead.\n");
+ });
+
+ FlagParser P;
+ BasicFlags F;
+ F.setDefaults();
+ registerXRayBasicFlags(&P, &F);
+ P.ParseString(useCompilerDefinedBasicFlags());
+ auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS");
+ if (EnvOpts == nullptr)
+ EnvOpts = "";
+
+ P.ParseString(EnvOpts);
+
+ // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
+ // set through XRAY_OPTIONS instead.
+ if (internal_strlen(EnvOpts) == 0) {
+ F.func_duration_threshold_us =
+ flags()->xray_naive_log_func_duration_threshold_us;
+ F.max_stack_depth = flags()->xray_naive_log_max_stack_depth;
+ F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size;
+ }
+
+ P.ParseString(static_cast<const char *>(Options));
+ GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
+ GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
+ GlobalOptions.MaxStackDepth = F.max_stack_depth;
+ *basicFlags() = F;
+
+ atomic_store(&ThresholdTicks,
+ atomic_load(&TicksPerSec, memory_order_acquire) *
+ GlobalOptions.DurationFilterMicros / 1000000,
+ memory_order_release);
+ __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire)
+ ? basicLoggingHandleArg1RealTSC
+ : basicLoggingHandleArg1EmulateTSC);
+ __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire)
+ ? basicLoggingHandleArg0RealTSC
+ : basicLoggingHandleArg0EmulateTSC);
+
+ // TODO: Implement custom event and typed event handling support in Basic
+ // Mode.
+ __xray_remove_customevent_handler();
+ __xray_remove_typedevent_handler();
+
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
+ uint8_t Expected = 0;
+ if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
+ memory_order_acq_rel) &&
+ Verbosity())
+ Report("Basic logging already finalized.\n");
+
+ // Nothing really to do aside from marking state of the global to be
+ // uninitialized.
+
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
+ // This really does nothing, since flushing the logs happen at the end of a
+ // thread's lifetime, or when the buffers are full.
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+// This is a handler that, effectively, does nothing.
+void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
+}
+
+bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
+ XRayLogImpl Impl{
+ basicLoggingInit,
+ basicLoggingFinalize,
+ basicLoggingHandleArg0Empty,
+ basicLoggingFlush,
+ };
+ auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
+ Verbosity())
+ Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
+ RegistrationResult);
+ if (flags()->xray_naive_log ||
+ !internal_strcmp(flags()->xray_mode, "xray-basic")) {
+ auto SelectResult = __xray_log_select_mode("xray-basic");
+ if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
+ if (Verbosity())
+ Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult);
+ return false;
+ }
+
+ // We initialize the implementation using the data we get from the
+ // XRAY_BASIC_OPTIONS environment variable, at this point of the
+ // implementation.
+ auto *Env = GetEnv("XRAY_BASIC_OPTIONS");
+ auto InitResult =
+ __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env);
+ if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
+ if (Verbosity())
+ Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult);
+ return false;
+ }
+
+ // At this point we know that we've successfully initialized Basic mode
+ // tracing, and the only chance we're going to get for the current thread to
+ // clean-up may be at thread/program exit. To ensure that we're going to get
+ // the cleanup even without calling the finalization routines, we're
+ // registering a program exit function that will do the cleanup.
+ static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
+ pthread_once(&DynamicOnce, +[] {
+ static void *FakeTLD = nullptr;
+ FakeTLD = &getThreadLocalData();
+ Atexit(+[] { TLDDestructor(FakeTLD); });
+ });
+ }
+ return true;
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h
new file mode 100644
index 000000000000..89caca66b585
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h
@@ -0,0 +1,42 @@
+//===-- xray_basic_logging.h ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_INMEMORY_LOG_H
+#define XRAY_XRAY_INMEMORY_LOG_H
+
+#include "xray/xray_log_interface.h"
+
+/// Basic (Naive) Mode
+/// ==================
+///
+/// This implementation hooks in through the XRay logging implementation
+/// framework. The Basic Mode implementation will keep appending to a file as
+/// soon as the thread-local buffers are full. It keeps minimal in-memory state
+/// and does the minimum filtering required to keep log files smaller.
+
+namespace __xray {
+
+XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
+ void *Options, size_t OptionsSize);
+XRayLogInitStatus basicLoggingFinalize();
+
+void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Entry);
+void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Entry);
+void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg1);
+void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg1);
+XRayLogFlushStatus basicLoggingFlush();
+XRayLogInitStatus basicLoggingReset();
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_INMEMORY_LOG_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp
new file mode 100644
index 000000000000..bad91e036cef
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp
@@ -0,0 +1,237 @@
+//===-- xray_buffer_queue.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_buffer_queue.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#if !SANITIZER_FUCHSIA
+#include "sanitizer_common/sanitizer_posix.h"
+#endif
+#include "xray_allocator.h"
+#include "xray_defs.h"
+#include <memory>
+#include <sys/mman.h>
+
+using namespace __xray;
+
+namespace {
+
+BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) {
+ auto B =
+ allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count));
+ return B == nullptr ? nullptr
+ : reinterpret_cast<BufferQueue::ControlBlock *>(B);
+}
+
+void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size,
+ size_t Count) {
+ deallocateBuffer(reinterpret_cast<unsigned char *>(C),
+ (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count));
+}
+
+void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) {
+ if (C == nullptr)
+ return;
+ if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1)
+ deallocControlBlock(C, Size, Count);
+}
+
+void incRefCount(BufferQueue::ControlBlock *C) {
+ if (C == nullptr)
+ return;
+ atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel);
+}
+
+// We use a struct to ensure that we are allocating one atomic_uint64_t per
+// cache line. This allows us to not worry about false-sharing among atomic
+// objects being updated (constantly) by different threads.
+struct ExtentsPadded {
+ union {
+ atomic_uint64_t Extents;
+ unsigned char Storage[kCacheLineSize];
+ };
+};
+
+constexpr size_t kExtentsSize = sizeof(ExtentsPadded);
+
+} // namespace
+
+BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) {
+ SpinMutexLock Guard(&Mutex);
+
+ if (!finalizing())
+ return BufferQueue::ErrorCode::AlreadyInitialized;
+
+ cleanupBuffers();
+
+ bool Success = false;
+ BufferSize = BS;
+ BufferCount = BC;
+
+ BackingStore = allocControlBlock(BufferSize, BufferCount);
+ if (BackingStore == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ auto CleanupBackingStore = at_scope_exit([&, this] {
+ if (Success)
+ return;
+ deallocControlBlock(BackingStore, BufferSize, BufferCount);
+ BackingStore = nullptr;
+ });
+
+ // Initialize enough atomic_uint64_t instances, each
+ ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount);
+ if (ExtentsBackingStore == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ auto CleanupExtentsBackingStore = at_scope_exit([&, this] {
+ if (Success)
+ return;
+ deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount);
+ ExtentsBackingStore = nullptr;
+ });
+
+ Buffers = initArray<BufferRep>(BufferCount);
+ if (Buffers == nullptr)
+ return BufferQueue::ErrorCode::NotEnoughMemory;
+
+ // At this point we increment the generation number to associate the buffers
+ // to the new generation.
+ atomic_fetch_add(&Generation, 1, memory_order_acq_rel);
+
+ // First, we initialize the refcount in the ControlBlock, which we treat as
+ // being at the start of the BackingStore pointer.
+ atomic_store(&BackingStore->RefCount, 1, memory_order_release);
+ atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release);
+
+ // Then we initialise the individual buffers that sub-divide the whole backing
+ // store. Each buffer will start at the `Data` member of the ControlBlock, and
+ // will be offsets from these locations.
+ for (size_t i = 0; i < BufferCount; ++i) {
+ auto &T = Buffers[i];
+ auto &Buf = T.Buff;
+ auto *E = reinterpret_cast<ExtentsPadded *>(&ExtentsBackingStore->Data +
+ (kExtentsSize * i));
+ Buf.Extents = &E->Extents;
+ atomic_store(Buf.Extents, 0, memory_order_release);
+ Buf.Generation = generation();
+ Buf.Data = &BackingStore->Data + (BufferSize * i);
+ Buf.Size = BufferSize;
+ Buf.BackingStore = BackingStore;
+ Buf.ExtentsBackingStore = ExtentsBackingStore;
+ Buf.Count = BufferCount;
+ T.Used = false;
+ }
+
+ Next = Buffers;
+ First = Buffers;
+ LiveBuffers = 0;
+ atomic_store(&Finalizing, 0, memory_order_release);
+ Success = true;
+ return BufferQueue::ErrorCode::Ok;
+}
+
+BufferQueue::BufferQueue(size_t B, size_t N,
+ bool &Success) XRAY_NEVER_INSTRUMENT
+ : BufferSize(B),
+ BufferCount(N),
+ Mutex(),
+ Finalizing{1},
+ BackingStore(nullptr),
+ ExtentsBackingStore(nullptr),
+ Buffers(nullptr),
+ Next(Buffers),
+ First(Buffers),
+ LiveBuffers(0),
+ Generation{0} {
+ Success = init(B, N) == BufferQueue::ErrorCode::Ok;
+}
+
+BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
+ if (atomic_load(&Finalizing, memory_order_acquire))
+ return ErrorCode::QueueFinalizing;
+
+ BufferRep *B = nullptr;
+ {
+ SpinMutexLock Guard(&Mutex);
+ if (LiveBuffers == BufferCount)
+ return ErrorCode::NotEnoughMemory;
+ B = Next++;
+ if (Next == (Buffers + BufferCount))
+ Next = Buffers;
+ ++LiveBuffers;
+ }
+
+ incRefCount(BackingStore);
+ incRefCount(ExtentsBackingStore);
+ Buf = B->Buff;
+ Buf.Generation = generation();
+ B->Used = true;
+ return ErrorCode::Ok;
+}
+
+BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
+ // Check whether the buffer being referred to is within the bounds of the
+ // backing store's range.
+ BufferRep *B = nullptr;
+ {
+ SpinMutexLock Guard(&Mutex);
+ if (Buf.Generation != generation() || LiveBuffers == 0) {
+ Buf = {};
+ decRefCount(Buf.BackingStore, Buf.Size, Buf.Count);
+ decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count);
+ return BufferQueue::ErrorCode::Ok;
+ }
+
+ if (Buf.Data < &BackingStore->Data ||
+ Buf.Data > &BackingStore->Data + (BufferCount * BufferSize))
+ return BufferQueue::ErrorCode::UnrecognizedBuffer;
+
+ --LiveBuffers;
+ B = First++;
+ if (First == (Buffers + BufferCount))
+ First = Buffers;
+ }
+
+ // Now that the buffer has been released, we mark it as "used".
+ B->Buff = Buf;
+ B->Used = true;
+ decRefCount(Buf.BackingStore, Buf.Size, Buf.Count);
+ decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count);
+ atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire),
+ memory_order_release);
+ Buf = {};
+ return ErrorCode::Ok;
+}
+
+BufferQueue::ErrorCode BufferQueue::finalize() {
+ if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel))
+ return ErrorCode::QueueFinalizing;
+ return ErrorCode::Ok;
+}
+
+void BufferQueue::cleanupBuffers() {
+ for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
+ B->~BufferRep();
+ deallocateBuffer(Buffers, BufferCount);
+ decRefCount(BackingStore, BufferSize, BufferCount);
+ decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount);
+ BackingStore = nullptr;
+ ExtentsBackingStore = nullptr;
+ Buffers = nullptr;
+ BufferCount = 0;
+ BufferSize = 0;
+}
+
+BufferQueue::~BufferQueue() { cleanupBuffers(); }
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h
new file mode 100644
index 000000000000..e1739d050f3d
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h
@@ -0,0 +1,280 @@
+//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_BUFFER_QUEUE_H
+#define XRAY_BUFFER_QUEUE_H
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "xray_defs.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace __xray {
+
+/// BufferQueue implements a circular queue of fixed sized buffers (much like a
+/// freelist) but is concerned with making it quick to initialise, finalise, and
+/// get from or return buffers to the queue. This is one key component of the
+/// "flight data recorder" (FDR) mode to support ongoing XRay function call
+/// trace collection.
+class BufferQueue {
+public:
+ /// ControlBlock represents the memory layout of how we interpret the backing
+ /// store for all buffers and extents managed by a BufferQueue instance. The
+ /// ControlBlock has the reference count as the first member, sized according
+ /// to platform-specific cache-line size. We never use the Buffer member of
+ /// the union, which is only there for compiler-supported alignment and
+ /// sizing.
+ ///
+ /// This ensures that the `Data` member will be placed at least kCacheLineSize
+ /// bytes from the beginning of the structure.
+ struct ControlBlock {
+ union {
+ atomic_uint64_t RefCount;
+ char Buffer[kCacheLineSize];
+ };
+
+ /// We need to make this size 1, to conform to the C++ rules for array data
+ /// members. Typically, we want to subtract this 1 byte for sizing
+ /// information.
+ char Data[1];
+ };
+
+ struct Buffer {
+ atomic_uint64_t *Extents = nullptr;
+ uint64_t Generation{0};
+ void *Data = nullptr;
+ size_t Size = 0;
+
+ private:
+ friend class BufferQueue;
+ ControlBlock *BackingStore = nullptr;
+ ControlBlock *ExtentsBackingStore = nullptr;
+ size_t Count = 0;
+ };
+
+ struct BufferRep {
+ // The managed buffer.
+ Buffer Buff;
+
+ // This is true if the buffer has been returned to the available queue, and
+ // is considered "used" by another thread.
+ bool Used = false;
+ };
+
+private:
+ // This models a ForwardIterator. |T| Must be either a `Buffer` or `const
+ // Buffer`. Note that we only advance to the "used" buffers, when
+ // incrementing, so that at dereference we're always at a valid point.
+ template <class T> class Iterator {
+ public:
+ BufferRep *Buffers = nullptr;
+ size_t Offset = 0;
+ size_t Max = 0;
+
+ Iterator &operator++() {
+ DCHECK_NE(Offset, Max);
+ do {
+ ++Offset;
+ } while (!Buffers[Offset].Used && Offset != Max);
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator C = *this;
+ ++(*this);
+ return C;
+ }
+
+ T &operator*() const { return Buffers[Offset].Buff; }
+
+ T *operator->() const { return &(Buffers[Offset].Buff); }
+
+ Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT
+ : Buffers(Root),
+ Offset(O),
+ Max(M) {
+ // We want to advance to the first Offset where the 'Used' property is
+ // true, or to the end of the list/queue.
+ while (!Buffers[Offset].Used && Offset != Max) {
+ ++Offset;
+ }
+ }
+
+ Iterator() = default;
+ Iterator(const Iterator &) = default;
+ Iterator(Iterator &&) = default;
+ Iterator &operator=(const Iterator &) = default;
+ Iterator &operator=(Iterator &&) = default;
+ ~Iterator() = default;
+
+ template <class V>
+ friend bool operator==(const Iterator &L, const Iterator<V> &R) {
+ DCHECK_EQ(L.Max, R.Max);
+ return L.Buffers == R.Buffers && L.Offset == R.Offset;
+ }
+
+ template <class V>
+ friend bool operator!=(const Iterator &L, const Iterator<V> &R) {
+ return !(L == R);
+ }
+ };
+
+ // Size of each individual Buffer.
+ size_t BufferSize;
+
+ // Amount of pre-allocated buffers.
+ size_t BufferCount;
+
+ SpinMutex Mutex;
+ atomic_uint8_t Finalizing;
+
+ // The collocated ControlBlock and buffer storage.
+ ControlBlock *BackingStore;
+
+ // The collocated ControlBlock and extents storage.
+ ControlBlock *ExtentsBackingStore;
+
+ // A dynamically allocated array of BufferRep instances.
+ BufferRep *Buffers;
+
+ // Pointer to the next buffer to be handed out.
+ BufferRep *Next;
+
+ // Pointer to the entry in the array where the next released buffer will be
+ // placed.
+ BufferRep *First;
+
+ // Count of buffers that have been handed out through 'getBuffer'.
+ size_t LiveBuffers;
+
+ // We use a generation number to identify buffers and which generation they're
+ // associated with.
+ atomic_uint64_t Generation;
+
+ /// Releases references to the buffers backed by the current buffer queue.
+ void cleanupBuffers();
+
+public:
+ enum class ErrorCode : unsigned {
+ Ok,
+ NotEnoughMemory,
+ QueueFinalizing,
+ UnrecognizedBuffer,
+ AlreadyFinalized,
+ AlreadyInitialized,
+ };
+
+ static const char *getErrorString(ErrorCode E) {
+ switch (E) {
+ case ErrorCode::Ok:
+ return "(none)";
+ case ErrorCode::NotEnoughMemory:
+ return "no available buffers in the queue";
+ case ErrorCode::QueueFinalizing:
+ return "queue already finalizing";
+ case ErrorCode::UnrecognizedBuffer:
+ return "buffer being returned not owned by buffer queue";
+ case ErrorCode::AlreadyFinalized:
+ return "queue already finalized";
+ case ErrorCode::AlreadyInitialized:
+ return "queue already initialized";
+ }
+ return "unknown error";
+ }
+
+ /// Initialise a queue of size |N| with buffers of size |B|. We report success
+ /// through |Success|.
+ BufferQueue(size_t B, size_t N, bool &Success);
+
+ /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
+ /// error in case there are no available buffers to return when we will run
+ /// over the upper bound for the total buffers.
+ ///
+ /// Requirements:
+ /// - BufferQueue is not finalising.
+ ///
+ /// Returns:
+ /// - ErrorCode::NotEnoughMemory on exceeding MaxSize.
+ /// - ErrorCode::Ok when we find a Buffer.
+ /// - ErrorCode::QueueFinalizing or ErrorCode::AlreadyFinalized on
+ /// a finalizing/finalized BufferQueue.
+ ErrorCode getBuffer(Buffer &Buf);
+
+ /// Updates |Buf| to point to nullptr, with size 0.
+ ///
+ /// Returns:
+ /// - ErrorCode::Ok when we successfully release the buffer.
+ /// - ErrorCode::UnrecognizedBuffer for when this BufferQueue does not own
+ /// the buffer being released.
+ ErrorCode releaseBuffer(Buffer &Buf);
+
+ /// Initializes the buffer queue, starting a new generation. We can re-set the
+ /// size of buffers with |BS| along with the buffer count with |BC|.
+ ///
+ /// Returns:
+ /// - ErrorCode::Ok when we successfully initialize the buffer. This
+ /// requires that the buffer queue is previously finalized.
+ /// - ErrorCode::AlreadyInitialized when the buffer queue is not finalized.
+ ErrorCode init(size_t BS, size_t BC);
+
+ bool finalizing() const {
+ return atomic_load(&Finalizing, memory_order_acquire);
+ }
+
+ uint64_t generation() const {
+ return atomic_load(&Generation, memory_order_acquire);
+ }
+
+ /// Returns the configured size of the buffers in the buffer queue.
+ size_t ConfiguredBufferSize() const { return BufferSize; }
+
+ /// Sets the state of the BufferQueue to finalizing, which ensures that:
+ ///
+ /// - All subsequent attempts to retrieve a Buffer will fail.
+ /// - All releaseBuffer operations will not fail.
+ ///
+ /// After a call to finalize succeeds, all subsequent calls to finalize will
+ /// fail with ErrorCode::QueueFinalizing.
+ ErrorCode finalize();
+
+ /// Applies the provided function F to each Buffer in the queue, only if the
+ /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
+ /// releaseBuffer(...) operation).
+ template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock G(&Mutex);
+ for (auto I = begin(), E = end(); I != E; ++I)
+ Fn(*I);
+ }
+
+ using const_iterator = Iterator<const Buffer>;
+ using iterator = Iterator<Buffer>;
+
+ /// Provides iterator access to the raw Buffer instances.
+ iterator begin() const { return iterator(Buffers, 0, BufferCount); }
+ const_iterator cbegin() const {
+ return const_iterator(Buffers, 0, BufferCount);
+ }
+ iterator end() const { return iterator(Buffers, BufferCount, BufferCount); }
+ const_iterator cend() const {
+ return const_iterator(Buffers, BufferCount, BufferCount);
+ }
+
+ // Cleans up allocated buffers.
+ ~BufferQueue();
+};
+
+} // namespace __xray
+
+#endif // XRAY_BUFFER_QUEUE_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h
new file mode 100644
index 000000000000..2da03c3c3451
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h
@@ -0,0 +1,31 @@
+//===-- xray_defs.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Common definitions useful for XRay sources.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_DEFS_H
+#define XRAY_XRAY_DEFS_H
+
+#if XRAY_SUPPORTED
+#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument))
+#else
+#define XRAY_NEVER_INSTRUMENT
+#endif
+
+#if SANITIZER_NETBSD
+// NetBSD: thread_local is not aligned properly, and the code relying
+// on it segfaults
+#define XRAY_TLS_ALIGNAS(x)
+#define XRAY_HAS_TLS_ALIGNAS 0
+#else
+#define XRAY_TLS_ALIGNAS(x) alignas(x)
+#define XRAY_HAS_TLS_ALIGNAS 1
+#endif
+
+#endif // XRAY_XRAY_DEFS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h
new file mode 100644
index 000000000000..28a3546caa7b
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h
@@ -0,0 +1,372 @@
+//===-- xray_fdr_controller.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_
+#define COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_
+
+#include <limits>
+#include <time.h>
+
+#include "xray/xray_interface.h"
+#include "xray/xray_records.h"
+#include "xray_buffer_queue.h"
+#include "xray_fdr_log_writer.h"
+
+namespace __xray {
+
+template <size_t Version = 5> class FDRController {
+ BufferQueue *BQ;
+ BufferQueue::Buffer &B;
+ FDRLogWriter &W;
+ int (*WallClockReader)(clockid_t, struct timespec *) = 0;
+ uint64_t CycleThreshold = 0;
+
+ uint64_t LastFunctionEntryTSC = 0;
+ uint64_t LatestTSC = 0;
+ uint16_t LatestCPU = 0;
+ tid_t TId = 0;
+ pid_t PId = 0;
+ bool First = true;
+
+ uint32_t UndoableFunctionEnters = 0;
+ uint32_t UndoableTailExits = 0;
+
+ bool finalized() const XRAY_NEVER_INSTRUMENT {
+ return BQ == nullptr || BQ->finalizing();
+ }
+
+ bool hasSpace(size_t S) XRAY_NEVER_INSTRUMENT {
+ return B.Data != nullptr && B.Generation == BQ->generation() &&
+ W.getNextRecord() + S <= reinterpret_cast<char *>(B.Data) + B.Size;
+ }
+
+ constexpr int32_t mask(int32_t FuncId) const XRAY_NEVER_INSTRUMENT {
+ return FuncId & ((1 << 29) - 1);
+ }
+
+ bool getNewBuffer() XRAY_NEVER_INSTRUMENT {
+ if (BQ->getBuffer(B) != BufferQueue::ErrorCode::Ok)
+ return false;
+
+ W.resetRecord();
+ DCHECK_EQ(W.getNextRecord(), B.Data);
+ LatestTSC = 0;
+ LatestCPU = 0;
+ First = true;
+ UndoableFunctionEnters = 0;
+ UndoableTailExits = 0;
+ atomic_store(B.Extents, 0, memory_order_release);
+ return true;
+ }
+
+ bool setupNewBuffer() XRAY_NEVER_INSTRUMENT {
+ if (finalized())
+ return false;
+
+ DCHECK(hasSpace(sizeof(MetadataRecord) * 3));
+ TId = GetTid();
+ PId = internal_getpid();
+ struct timespec TS {
+ 0, 0
+ };
+ WallClockReader(CLOCK_MONOTONIC, &TS);
+
+ MetadataRecord Metadata[] = {
+ // Write out a MetadataRecord to signify that this is the start of a new
+ // buffer, associated with a particular thread, with a new CPU. For the
+ // data, we have 15 bytes to squeeze as much information as we can. At
+ // this point we only write down the following bytes:
+ // - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8
+ // bytes)
+ createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>(
+ static_cast<int32_t>(TId)),
+
+ // Also write the WalltimeMarker record. We only really need microsecond
+ // precision here, and enforce across platforms that we need 64-bit
+ // seconds and 32-bit microseconds encoded in the Metadata record.
+ createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>(
+ static_cast<int64_t>(TS.tv_sec),
+ static_cast<int32_t>(TS.tv_nsec / 1000)),
+
+ // Also write the Pid record.
+ createMetadataRecord<MetadataRecord::RecordKinds::Pid>(
+ static_cast<int32_t>(PId)),
+ };
+
+ if (finalized())
+ return false;
+ return W.writeMetadataRecords(Metadata);
+ }
+
+ bool prepareBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
+ if (finalized())
+ return returnBuffer();
+
+ if (UNLIKELY(!hasSpace(S))) {
+ if (!returnBuffer())
+ return false;
+ if (!getNewBuffer())
+ return false;
+ if (!setupNewBuffer())
+ return false;
+ }
+
+ if (First) {
+ First = false;
+ W.resetRecord();
+ atomic_store(B.Extents, 0, memory_order_release);
+ return setupNewBuffer();
+ }
+
+ return true;
+ }
+
+ bool returnBuffer() XRAY_NEVER_INSTRUMENT {
+ if (BQ == nullptr)
+ return false;
+
+ First = true;
+ if (finalized()) {
+ BQ->releaseBuffer(B); // ignore result.
+ return false;
+ }
+
+ return BQ->releaseBuffer(B) == BufferQueue::ErrorCode::Ok;
+ }
+
+ enum class PreambleResult { NoChange, WroteMetadata, InvalidBuffer };
+ PreambleResult recordPreamble(uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ if (UNLIKELY(LatestCPU != CPU || LatestTSC == 0)) {
+ // We update our internal tracking state for the Latest TSC and CPU we've
+ // seen, then write out the appropriate metadata and function records.
+ LatestTSC = TSC;
+ LatestCPU = CPU;
+
+ if (B.Generation != BQ->generation())
+ return PreambleResult::InvalidBuffer;
+
+ W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC);
+ return PreambleResult::WroteMetadata;
+ }
+
+ DCHECK_EQ(LatestCPU, CPU);
+
+ if (UNLIKELY(LatestTSC > TSC ||
+ TSC - LatestTSC >
+ uint64_t{std::numeric_limits<int32_t>::max()})) {
+ // Either the TSC has wrapped around from the last TSC we've seen or the
+ // delta is too large to fit in a 32-bit signed integer, so we write a
+ // wrap-around record.
+ LatestTSC = TSC;
+
+ if (B.Generation != BQ->generation())
+ return PreambleResult::InvalidBuffer;
+
+ W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC);
+ return PreambleResult::WroteMetadata;
+ }
+
+ return PreambleResult::NoChange;
+ }
+
+ bool rewindRecords(int32_t FuncId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ // Undo one enter record, because at this point we are either at the state
+ // of:
+ // - We are exiting a function that we recently entered.
+ // - We are exiting a function that was the result of a sequence of tail
+ // exits, and we can check whether the tail exits can be re-wound.
+ //
+ FunctionRecord F;
+ W.undoWrites(sizeof(FunctionRecord));
+ if (B.Generation != BQ->generation())
+ return false;
+ internal_memcpy(&F, W.getNextRecord(), sizeof(FunctionRecord));
+
+ DCHECK(F.RecordKind ==
+ uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+ "Expected to find function entry recording when rewinding.");
+ DCHECK_EQ(F.FuncId, FuncId & ~(0x0F << 28));
+
+ LatestTSC -= F.TSCDelta;
+ if (--UndoableFunctionEnters != 0) {
+ LastFunctionEntryTSC -= F.TSCDelta;
+ return true;
+ }
+
+ LastFunctionEntryTSC = 0;
+ auto RewindingTSC = LatestTSC;
+ auto RewindingRecordPtr = W.getNextRecord() - sizeof(FunctionRecord);
+ while (UndoableTailExits) {
+ if (B.Generation != BQ->generation())
+ return false;
+ internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord));
+ DCHECK_EQ(F.RecordKind,
+ uint8_t(FunctionRecord::RecordKinds::FunctionTailExit));
+ RewindingTSC -= F.TSCDelta;
+ RewindingRecordPtr -= sizeof(FunctionRecord);
+ if (B.Generation != BQ->generation())
+ return false;
+ internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord));
+
+ // This tail call exceeded the threshold duration. It will not be erased.
+ if ((TSC - RewindingTSC) >= CycleThreshold) {
+ UndoableTailExits = 0;
+ return true;
+ }
+
+ --UndoableTailExits;
+ W.undoWrites(sizeof(FunctionRecord) * 2);
+ LatestTSC = RewindingTSC;
+ }
+ return true;
+ }
+
+public:
+ template <class WallClockFunc>
+ FDRController(BufferQueue *BQ, BufferQueue::Buffer &B, FDRLogWriter &W,
+ WallClockFunc R, uint64_t C) XRAY_NEVER_INSTRUMENT
+ : BQ(BQ),
+ B(B),
+ W(W),
+ WallClockReader(R),
+ CycleThreshold(C) {}
+
+ bool functionEnter(int32_t FuncId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ if (finalized() ||
+ !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
+ return returnBuffer();
+
+ auto PreambleStatus = recordPreamble(TSC, CPU);
+ if (PreambleStatus == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ if (PreambleStatus == PreambleResult::WroteMetadata) {
+ UndoableFunctionEnters = 1;
+ UndoableTailExits = 0;
+ } else {
+ ++UndoableFunctionEnters;
+ }
+
+ auto Delta = TSC - LatestTSC;
+ LastFunctionEntryTSC = TSC;
+ LatestTSC = TSC;
+ return W.writeFunction(FDRLogWriter::FunctionRecordKind::Enter,
+ mask(FuncId), Delta);
+ }
+
+ bool functionTailExit(int32_t FuncId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ if (finalized())
+ return returnBuffer();
+
+ if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
+ return returnBuffer();
+
+ auto PreambleStatus = recordPreamble(TSC, CPU);
+ if (PreambleStatus == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ if (PreambleStatus == PreambleResult::NoChange &&
+ UndoableFunctionEnters != 0 &&
+ TSC - LastFunctionEntryTSC < CycleThreshold)
+ return rewindRecords(FuncId, TSC, CPU);
+
+ UndoableTailExits = UndoableFunctionEnters ? UndoableTailExits + 1 : 0;
+ UndoableFunctionEnters = 0;
+ auto Delta = TSC - LatestTSC;
+ LatestTSC = TSC;
+ return W.writeFunction(FDRLogWriter::FunctionRecordKind::TailExit,
+ mask(FuncId), Delta);
+ }
+
+ bool functionEnterArg(int32_t FuncId, uint64_t TSC, uint16_t CPU,
+ uint64_t Arg) XRAY_NEVER_INSTRUMENT {
+ if (finalized() ||
+ !prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord)) ||
+ recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ auto Delta = TSC - LatestTSC;
+ LatestTSC = TSC;
+ LastFunctionEntryTSC = 0;
+ UndoableFunctionEnters = 0;
+ UndoableTailExits = 0;
+
+ return W.writeFunctionWithArg(FDRLogWriter::FunctionRecordKind::EnterArg,
+ mask(FuncId), Delta, Arg);
+ }
+
+ bool functionExit(int32_t FuncId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ if (finalized() ||
+ !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord)))
+ return returnBuffer();
+
+ auto PreambleStatus = recordPreamble(TSC, CPU);
+ if (PreambleStatus == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ if (PreambleStatus == PreambleResult::NoChange &&
+ UndoableFunctionEnters != 0 &&
+ TSC - LastFunctionEntryTSC < CycleThreshold)
+ return rewindRecords(FuncId, TSC, CPU);
+
+ auto Delta = TSC - LatestTSC;
+ LatestTSC = TSC;
+ UndoableFunctionEnters = 0;
+ UndoableTailExits = 0;
+ return W.writeFunction(FDRLogWriter::FunctionRecordKind::Exit, mask(FuncId),
+ Delta);
+ }
+
+ bool customEvent(uint64_t TSC, uint16_t CPU, const void *Event,
+ int32_t EventSize) XRAY_NEVER_INSTRUMENT {
+ if (finalized() ||
+ !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) ||
+ recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ auto Delta = TSC - LatestTSC;
+ LatestTSC = TSC;
+ UndoableFunctionEnters = 0;
+ UndoableTailExits = 0;
+ return W.writeCustomEvent(Delta, Event, EventSize);
+ }
+
+ bool typedEvent(uint64_t TSC, uint16_t CPU, uint16_t EventType,
+ const void *Event, int32_t EventSize) XRAY_NEVER_INSTRUMENT {
+ if (finalized() ||
+ !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) ||
+ recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer)
+ return returnBuffer();
+
+ auto Delta = TSC - LatestTSC;
+ LatestTSC = TSC;
+ UndoableFunctionEnters = 0;
+ UndoableTailExits = 0;
+ return W.writeTypedEvent(Delta, EventType, Event, EventSize);
+ }
+
+ bool flush() XRAY_NEVER_INSTRUMENT {
+ if (finalized()) {
+ returnBuffer(); // ignore result.
+ return true;
+ }
+ return returnBuffer();
+ }
+};
+
+} // namespace __xray
+
+#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp
new file mode 100644
index 000000000000..272b0b7cb1f7
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp
@@ -0,0 +1,47 @@
+//===-- xray_fdr_flags.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay FDR flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_fdr_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+FDRFlags xray_fdr_flags_dont_use_directly; // use via fdrFlags().
+
+void FDRFlags::setDefaults() XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_fdr_flags.inc"
+#undef XRAY_FLAG
+}
+
+void registerXRayFDRFlags(FlagParser *P, FDRFlags *F) XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
+ RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_fdr_flags.inc"
+#undef XRAY_FLAG
+}
+
+const char *useCompilerDefinedFDRFlags() XRAY_NEVER_INSTRUMENT {
+#ifdef XRAY_FDR_OPTIONS
+ return SANITIZER_STRINGIFY(XRAY_FDR_OPTIONS);
+#else
+ return "";
+#endif
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h
new file mode 100644
index 000000000000..d6f00dc48006
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h
@@ -0,0 +1,37 @@
+//===-- xray_fdr_flags.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This file defines the flags for the flight-data-recorder mode implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FDR_FLAGS_H
+#define XRAY_FDR_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+struct FDRFlags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_fdr_flags.inc"
+#undef XRAY_FLAG
+
+ void setDefaults();
+};
+
+extern FDRFlags xray_fdr_flags_dont_use_directly;
+extern void registerXRayFDRFlags(FlagParser *P, FDRFlags *F);
+const char *useCompilerDefinedFDRFlags();
+inline FDRFlags *fdrFlags() { return &xray_fdr_flags_dont_use_directly; }
+
+} // namespace __xray
+
+#endif // XRAY_FDR_FLAGS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc
new file mode 100644
index 000000000000..6082b7e78521
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc
@@ -0,0 +1,28 @@
+//===-- xray_fdr_flags.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay FDR Mode runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+// FDR (Flight Data Recorder) Mode logging options.
+XRAY_FLAG(int, func_duration_threshold_us, 5,
+ "FDR logging will try to skip functions that execute for fewer "
+ "microseconds than this threshold.")
+XRAY_FLAG(int, grace_period_ms, 100,
+ "FDR logging will wait this much time in milliseconds before "
+ "actually flushing the log; this gives a chance for threads to "
+ "notice that the log has been finalized and clean up.")
+XRAY_FLAG(int, buffer_size, 16384,
+ "Size of buffers in the circular buffer queue.")
+XRAY_FLAG(int, buffer_max, 100, "Maximum number of buffers in the queue.")
+XRAY_FLAG(bool, no_file_flush, false,
+ "Set to true to not write log files by default.")
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h
new file mode 100644
index 000000000000..7a5d438314af
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h
@@ -0,0 +1,75 @@
+//===-- xray_fdr_log_records.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H
+#define XRAY_XRAY_FDR_LOG_RECORDS_H
+#include <cstdint>
+
+namespace __xray {
+
+enum class RecordType : uint8_t { Function, Metadata };
+
+// A MetadataRecord encodes the kind of record in its first byte, and have 15
+// additional bytes in the end to hold free-form data.
+struct alignas(16) MetadataRecord {
+ // A MetadataRecord must always have a type of 1.
+ /* RecordType */ uint8_t Type : 1;
+
+ // Each kind of record is represented as a 7-bit value (even though we use an
+ // unsigned 8-bit enum class to do so).
+ enum class RecordKinds : uint8_t {
+ NewBuffer,
+ EndOfBuffer,
+ NewCPUId,
+ TSCWrap,
+ WalltimeMarker,
+ CustomEventMarker,
+ CallArgument,
+ BufferExtents,
+ TypedEventMarker,
+ Pid,
+ };
+
+ // Use 7 bits to identify this record type.
+ /* RecordKinds */ uint8_t RecordKind : 7;
+ char Data[15];
+} __attribute__((packed));
+
+static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
+
+struct alignas(8) FunctionRecord {
+ // A FunctionRecord must always have a type of 0.
+ /* RecordType */ uint8_t Type : 1;
+ enum class RecordKinds {
+ FunctionEnter = 0x00,
+ FunctionExit = 0x01,
+ FunctionTailExit = 0x02,
+ };
+ /* RecordKinds */ uint8_t RecordKind : 3;
+
+ // We only use 28 bits of the function ID, so that we can use as few bytes as
+ // possible. This means we only support 2^28 (268,435,456) unique function ids
+ // in a single binary.
+ int FuncId : 28;
+
+ // We use another 4 bytes to hold the delta between the previous entry's TSC.
+ // In case we've found that the distance is greater than the allowable 32 bits
+ // (either because we are running in a different CPU and the TSC might be
+ // different then), we should use a MetadataRecord before this FunctionRecord
+ // that will contain the full TSC for that CPU, and keep this to 0.
+ uint32_t TSCDelta;
+} __attribute__((packed));
+
+static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOG_RECORDS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h
new file mode 100644
index 000000000000..0378663c3907
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h
@@ -0,0 +1,231 @@
+//===-- xray_fdr_log_writer.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_
+#define COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_
+
+#include "xray_buffer_queue.h"
+#include "xray_fdr_log_records.h"
+#include <functional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace __xray {
+
+template <size_t Index> struct SerializerImpl {
+ template <class Tuple,
+ typename std::enable_if<
+ Index<std::tuple_size<
+ typename std::remove_reference<Tuple>::type>::value,
+ int>::type = 0> static void serializeTo(char *Buffer,
+ Tuple &&T) {
+ auto P = reinterpret_cast<const char *>(&std::get<Index>(T));
+ constexpr auto Size = sizeof(std::get<Index>(T));
+ internal_memcpy(Buffer, P, Size);
+ SerializerImpl<Index + 1>::serializeTo(Buffer + Size,
+ std::forward<Tuple>(T));
+ }
+
+ template <class Tuple,
+ typename std::enable_if<
+ Index >= std::tuple_size<typename std::remove_reference<
+ Tuple>::type>::value,
+ int>::type = 0>
+ static void serializeTo(char *, Tuple &&) {}
+};
+
+using Serializer = SerializerImpl<0>;
+
+template <class Tuple, size_t Index> struct AggregateSizesImpl {
+ static constexpr size_t value =
+ sizeof(typename std::tuple_element<Index, Tuple>::type) +
+ AggregateSizesImpl<Tuple, Index - 1>::value;
+};
+
+template <class Tuple> struct AggregateSizesImpl<Tuple, 0> {
+ static constexpr size_t value =
+ sizeof(typename std::tuple_element<0, Tuple>::type);
+};
+
+template <class Tuple> struct AggregateSizes {
+ static constexpr size_t value =
+ AggregateSizesImpl<Tuple, std::tuple_size<Tuple>::value - 1>::value;
+};
+
+template <MetadataRecord::RecordKinds Kind, class... DataTypes>
+MetadataRecord createMetadataRecord(DataTypes &&... Ds) {
+ static_assert(AggregateSizes<std::tuple<DataTypes...>>::value <=
+ sizeof(MetadataRecord) - 1,
+ "Metadata payload longer than metadata buffer!");
+ MetadataRecord R;
+ R.Type = 1;
+ R.RecordKind = static_cast<uint8_t>(Kind);
+ Serializer::serializeTo(R.Data,
+ std::make_tuple(std::forward<DataTypes>(Ds)...));
+ return R;
+}
+
+class FDRLogWriter {
+ BufferQueue::Buffer &Buffer;
+ char *NextRecord = nullptr;
+
+ template <class T> void writeRecord(const T &R) {
+ internal_memcpy(NextRecord, reinterpret_cast<const char *>(&R), sizeof(T));
+ NextRecord += sizeof(T);
+ // We need this atomic fence here to ensure that other threads attempting to
+ // read the bytes in the buffer will see the writes committed before the
+ // extents are updated.
+ atomic_thread_fence(memory_order_release);
+ atomic_fetch_add(Buffer.Extents, sizeof(T), memory_order_acq_rel);
+ }
+
+public:
+ explicit FDRLogWriter(BufferQueue::Buffer &B, char *P)
+ : Buffer(B), NextRecord(P) {
+ DCHECK_NE(Buffer.Data, nullptr);
+ DCHECK_NE(NextRecord, nullptr);
+ }
+
+ explicit FDRLogWriter(BufferQueue::Buffer &B)
+ : FDRLogWriter(B, static_cast<char *>(B.Data)) {}
+
+ template <MetadataRecord::RecordKinds Kind, class... Data>
+ bool writeMetadata(Data &&... Ds) {
+ // TODO: Check boundary conditions:
+ // 1) Buffer is full, and cannot handle one metadata record.
+ // 2) Buffer queue is finalising.
+ writeRecord(createMetadataRecord<Kind>(std::forward<Data>(Ds)...));
+ return true;
+ }
+
+ template <size_t N> size_t writeMetadataRecords(MetadataRecord (&Recs)[N]) {
+ constexpr auto Size = sizeof(MetadataRecord) * N;
+ internal_memcpy(NextRecord, reinterpret_cast<const char *>(Recs), Size);
+ NextRecord += Size;
+ // We need this atomic fence here to ensure that other threads attempting to
+ // read the bytes in the buffer will see the writes committed before the
+ // extents are updated.
+ atomic_thread_fence(memory_order_release);
+ atomic_fetch_add(Buffer.Extents, Size, memory_order_acq_rel);
+ return Size;
+ }
+
+ enum class FunctionRecordKind : uint8_t {
+ Enter = 0x00,
+ Exit = 0x01,
+ TailExit = 0x02,
+ EnterArg = 0x03,
+ };
+
+ bool writeFunction(FunctionRecordKind Kind, int32_t FuncId, int32_t Delta) {
+ FunctionRecord R;
+ R.Type = 0;
+ R.RecordKind = uint8_t(Kind);
+ R.FuncId = FuncId;
+ R.TSCDelta = Delta;
+ writeRecord(R);
+ return true;
+ }
+
+ bool writeFunctionWithArg(FunctionRecordKind Kind, int32_t FuncId,
+ int32_t Delta, uint64_t Arg) {
+ // We need to write the function with arg into the buffer, and then
+ // atomically update the buffer extents. This ensures that any reads
+ // synchronised on the buffer extents record will always see the writes
+ // that happen before the atomic update.
+ FunctionRecord R;
+ R.Type = 0;
+ R.RecordKind = uint8_t(Kind);
+ R.FuncId = FuncId;
+ R.TSCDelta = Delta;
+ MetadataRecord A =
+ createMetadataRecord<MetadataRecord::RecordKinds::CallArgument>(Arg);
+ NextRecord = reinterpret_cast<char *>(internal_memcpy(
+ NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) +
+ sizeof(R);
+ NextRecord = reinterpret_cast<char *>(internal_memcpy(
+ NextRecord, reinterpret_cast<char *>(&A), sizeof(A))) +
+ sizeof(A);
+ // We need this atomic fence here to ensure that other threads attempting to
+ // read the bytes in the buffer will see the writes committed before the
+ // extents are updated.
+ atomic_thread_fence(memory_order_release);
+ atomic_fetch_add(Buffer.Extents, sizeof(R) + sizeof(A),
+ memory_order_acq_rel);
+ return true;
+ }
+
+ bool writeCustomEvent(int32_t Delta, const void *Event, int32_t EventSize) {
+ // We write the metadata record and the custom event data into the buffer
+ // first, before we atomically update the extents for the buffer. This
+ // allows us to ensure that any threads reading the extents of the buffer
+ // will only ever see the full metadata and custom event payload accounted
+ // (no partial writes accounted).
+ MetadataRecord R =
+ createMetadataRecord<MetadataRecord::RecordKinds::CustomEventMarker>(
+ EventSize, Delta);
+ NextRecord = reinterpret_cast<char *>(internal_memcpy(
+ NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) +
+ sizeof(R);
+ NextRecord = reinterpret_cast<char *>(
+ internal_memcpy(NextRecord, Event, EventSize)) +
+ EventSize;
+
+ // We need this atomic fence here to ensure that other threads attempting to
+ // read the bytes in the buffer will see the writes committed before the
+ // extents are updated.
+ atomic_thread_fence(memory_order_release);
+ atomic_fetch_add(Buffer.Extents, sizeof(R) + EventSize,
+ memory_order_acq_rel);
+ return true;
+ }
+
+ bool writeTypedEvent(int32_t Delta, uint16_t EventType, const void *Event,
+ int32_t EventSize) {
+ // We do something similar when writing out typed events, see
+ // writeCustomEvent(...) above for details.
+ MetadataRecord R =
+ createMetadataRecord<MetadataRecord::RecordKinds::TypedEventMarker>(
+ EventSize, Delta, EventType);
+ NextRecord = reinterpret_cast<char *>(internal_memcpy(
+ NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) +
+ sizeof(R);
+ NextRecord = reinterpret_cast<char *>(
+ internal_memcpy(NextRecord, Event, EventSize)) +
+ EventSize;
+
+ // We need this atomic fence here to ensure that other threads attempting to
+ // read the bytes in the buffer will see the writes committed before the
+ // extents are updated.
+ atomic_thread_fence(memory_order_release);
+ atomic_fetch_add(Buffer.Extents, EventSize, memory_order_acq_rel);
+ return true;
+ }
+
+ char *getNextRecord() const { return NextRecord; }
+
+ void resetRecord() {
+ NextRecord = reinterpret_cast<char *>(Buffer.Data);
+ atomic_store(Buffer.Extents, 0, memory_order_release);
+ }
+
+ void undoWrites(size_t B) {
+ DCHECK_GE(NextRecord - B, reinterpret_cast<char *>(Buffer.Data));
+ NextRecord -= B;
+ atomic_fetch_sub(Buffer.Extents, B, memory_order_acq_rel);
+ }
+
+}; // namespace __xray
+
+} // namespace __xray
+
+#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp
new file mode 100644
index 000000000000..16ce483502f0
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp
@@ -0,0 +1,757 @@
+//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Here we implement the Flight Data Recorder mode for XRay, where we use
+// compact structures to store records in memory as well as when writing out the
+// data to files.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_fdr_logging.h"
+#include <cassert>
+#include <errno.h>
+#include <limits>
+#include <memory>
+#include <pthread.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray/xray_records.h"
+#include "xray_allocator.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_fdr_controller.h"
+#include "xray_fdr_flags.h"
+#include "xray_fdr_log_writer.h"
+#include "xray_flags.h"
+#include "xray_recursion_guard.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
+
+namespace __xray {
+
+static atomic_sint32_t LoggingStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
+namespace {
+
+// Group together thread-local-data in a struct, then hide it behind a function
+// call so that it can be initialized on first use instead of as a global. We
+// force the alignment to 64-bytes for x86 cache line alignment, as this
+// structure is used in the hot path of implementation.
+struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
+ BufferQueue::Buffer Buffer{};
+ BufferQueue *BQ = nullptr;
+
+ using LogWriterStorage =
+ typename std::aligned_storage<sizeof(FDRLogWriter),
+ alignof(FDRLogWriter)>::type;
+
+ LogWriterStorage LWStorage;
+ FDRLogWriter *Writer = nullptr;
+
+ using ControllerStorage =
+ typename std::aligned_storage<sizeof(FDRController<>),
+ alignof(FDRController<>)>::type;
+ ControllerStorage CStorage;
+ FDRController<> *Controller = nullptr;
+};
+
+} // namespace
+
+static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
+ "ThreadLocalData must be trivially destructible");
+
+// Use a global pthread key to identify thread-local data for logging.
+static pthread_key_t Key;
+
+// Global BufferQueue.
+static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+
+// Global thresholds for function durations.
+static atomic_uint64_t ThresholdTicks{0};
+
+// Global for ticks per second.
+static atomic_uint64_t TicksPerSec{0};
+
+static atomic_sint32_t LogFlushStatus = {
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+// This function will initialize the thread-local data structure used by the FDR
+// logging implementation and return a reference to it. The implementation
+// details require a bit of care to maintain.
+//
+// First, some requirements on the implementation in general:
+//
+// - XRay handlers should not call any memory allocation routines that may
+// delegate to an instrumented implementation. This means functions like
+// malloc() and free() should not be called while instrumenting.
+//
+// - We would like to use some thread-local data initialized on first-use of
+// the XRay instrumentation. These allow us to implement unsynchronized
+// routines that access resources associated with the thread.
+//
+// The implementation here uses a few mechanisms that allow us to provide both
+// the requirements listed above. We do this by:
+//
+// 1. Using a thread-local aligned storage buffer for representing the
+// ThreadLocalData struct. This data will be uninitialized memory by
+// design.
+//
+// 2. Not requiring a thread exit handler/implementation, keeping the
+// thread-local as purely a collection of references/data that do not
+// require cleanup.
+//
+// We're doing this to avoid using a `thread_local` object that has a
+// non-trivial destructor, because the C++ runtime might call std::malloc(...)
+// to register calls to destructors. Deadlocks may arise when, for example, an
+// externally provided malloc implementation is XRay instrumented, and
+// initializing the thread-locals involves calling into malloc. A malloc
+// implementation that does global synchronization might be holding a lock for a
+// critical section, calling a function that might be XRay instrumented (and
+// thus in turn calling into malloc by virtue of registration of the
+// thread_local's destructor).
+#if XRAY_HAS_TLS_ALIGNAS
+static_assert(alignof(ThreadLocalData) >= 64,
+ "ThreadLocalData must be cache line aligned.");
+#endif
+static ThreadLocalData &getThreadLocalData() {
+ thread_local typename std::aligned_storage<
+ sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
+
+ if (pthread_getspecific(Key) == NULL) {
+ new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
+ pthread_setspecific(Key, &TLDStorage);
+ }
+
+ return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
+}
+
+static XRayFileHeader &fdrCommonHeaderInfo() {
+ static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
+ static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
+ static bool TSCSupported = true;
+ static uint64_t CycleFrequency = NanosecondsPerSecond;
+ pthread_once(
+ &OnceInit, +[] {
+ XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
+ // Version 2 of the log writes the extents of the buffer, instead of
+ // relying on an end-of-buffer record.
+ // Version 3 includes PID metadata record.
+ // Version 4 includes CPU data in the custom event records.
+ // Version 5 uses relative deltas for custom and typed event records,
+ // and removes the CPU data in custom event records (similar to how
+ // function records use deltas instead of full TSCs and rely on other
+ // metadata records for TSC wraparound and CPU migration).
+ H.Version = 5;
+ H.Type = FileTypes::FDR_LOG;
+
+ // Test for required CPU features and cache the cycle frequency
+ TSCSupported = probeRequiredCPUFeatures();
+ if (TSCSupported)
+ CycleFrequency = getTSCFrequency();
+ H.CycleFrequency = CycleFrequency;
+
+ // FIXME: Actually check whether we have 'constant_tsc' and
+ // 'nonstop_tsc' before setting the values in the header.
+ H.ConstantTSC = 1;
+ H.NonstopTSC = 1;
+ });
+ return reinterpret_cast<XRayFileHeader &>(HStorage);
+}
+
+// This is the iterator implementation, which knows how to handle FDR-mode
+// specific buffers. This is used as an implementation of the iterator function
+// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
+// buffer iteration for the currently installed FDR mode buffers. In particular:
+//
+// - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
+// then the iterator returns the header information.
+// - If the argument represents the header information ({address of header
+// info, size of the header info}) then it returns the first FDR buffer's
+// address and extents.
+// - It will keep returning the next buffer and extents as there are more
+// buffers to process. When the input represents the last buffer, it will
+// return the initial state to signal completion ({nullptr, 0}).
+//
+// See xray/xray_log_interface.h for more details on the requirements for the
+// implementations of __xray_set_buffer_iterator(...) and
+// __xray_log_process_buffers(...).
+XRayBuffer fdrIterator(const XRayBuffer B) {
+ DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
+ DCHECK(BQ->finalizing());
+
+ if (BQ == nullptr || !BQ->finalizing()) {
+ if (Verbosity())
+ Report(
+ "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
+ return {nullptr, 0};
+ }
+
+ // We use a global scratch-pad for the header information, which only gets
+ // initialized the first time this function is called. We'll update one part
+ // of this information with some relevant data (in particular the number of
+ // buffers to expect).
+ static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
+ static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &HeaderOnce, +[] {
+ reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
+ fdrCommonHeaderInfo();
+ });
+
+ // We use a convenience alias for code referring to Header from here on out.
+ auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
+ if (B.Data == nullptr && B.Size == 0) {
+ Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
+ return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
+ }
+
+ static BufferQueue::const_iterator It{};
+ static BufferQueue::const_iterator End{};
+ static uint8_t *CurrentBuffer{nullptr};
+ static size_t SerializedBufferSize = 0;
+ if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
+ // From this point on, we provide raw access to the raw buffer we're getting
+ // from the BufferQueue. We're relying on the iterators from the current
+ // Buffer queue.
+ It = BQ->cbegin();
+ End = BQ->cend();
+ }
+
+ if (CurrentBuffer != nullptr) {
+ deallocateBuffer(CurrentBuffer, SerializedBufferSize);
+ CurrentBuffer = nullptr;
+ }
+
+ if (It == End)
+ return {nullptr, 0};
+
+ // Set up the current buffer to contain the extents like we would when writing
+ // out to disk. The difference here would be that we still write "empty"
+ // buffers, or at least go through the iterators faithfully to let the
+ // handlers see the empty buffers in the queue.
+ //
+ // We need this atomic fence here to ensure that writes happening to the
+ // buffer have been committed before we load the extents atomically. Because
+ // the buffer is not explicitly synchronised across threads, we rely on the
+ // fence ordering to ensure that writes we expect to have been completed
+ // before the fence are fully committed before we read the extents.
+ atomic_thread_fence(memory_order_acquire);
+ auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
+ SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
+ CurrentBuffer = allocateBuffer(SerializedBufferSize);
+ if (CurrentBuffer == nullptr)
+ return {nullptr, 0};
+
+ // Write out the extents as a Metadata Record into the CurrentBuffer.
+ MetadataRecord ExtentsRecord;
+ ExtentsRecord.Type = uint8_t(RecordType::Metadata);
+ ExtentsRecord.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents);
+ internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
+ auto AfterExtents =
+ static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
+ sizeof(MetadataRecord))) +
+ sizeof(MetadataRecord);
+ internal_memcpy(AfterExtents, It->Data, BufferSize);
+
+ XRayBuffer Result;
+ Result.Data = CurrentBuffer;
+ Result.Size = SerializedBufferSize;
+ ++It;
+ return Result;
+}
+
+// Must finalize before flushing.
+XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
+ if (atomic_load(&LoggingStatus, memory_order_acquire) !=
+ XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (Verbosity())
+ Report("Not flushing log, implementation is not finalized.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result,
+ XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+ memory_order_release)) {
+ if (Verbosity())
+ Report("Not flushing log, implementation is still finalizing.\n");
+ return static_cast<XRayLogFlushStatus>(Result);
+ }
+
+ if (BQ == nullptr) {
+ if (Verbosity())
+ Report("Cannot flush when global buffer queue is null.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ // We wait a number of milliseconds to allow threads to see that we've
+ // finalised before attempting to flush the log.
+ SleepForMillis(fdrFlags()->grace_period_ms);
+
+ // At this point, we're going to uninstall the iterator implementation, before
+ // we decide to do anything further with the global buffer queue.
+ __xray_log_remove_buffer_iterator();
+
+ // Once flushed, we should set the global status of the logging implementation
+ // to "uninitialized" to allow for FDR-logging multiple runs.
+ auto ResetToUnitialized = at_scope_exit([] {
+ atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ });
+
+ auto CleanupBuffers = at_scope_exit([] {
+ auto &TLD = getThreadLocalData();
+ if (TLD.Controller != nullptr)
+ TLD.Controller->flush();
+ });
+
+ if (fdrFlags()->no_file_flush) {
+ if (Verbosity())
+ Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
+
+ atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+ }
+
+ // We write out the file in the following format:
+ //
+ // 1) We write down the XRay file header with version 1, type FDR_LOG.
+ // 2) Then we use the 'apply' member of the BufferQueue that's live, to
+ // ensure that at this point in time we write down the buffers that have
+ // been released (and marked "used") -- we dump the full buffer for now
+ // (fixed-sized) and let the tools reading the buffers deal with the data
+ // afterwards.
+ //
+ LogWriter *LW = LogWriter::Open();
+ if (LW == nullptr) {
+ auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ atomic_store(&LogFlushStatus, Result, memory_order_release);
+ return Result;
+ }
+
+ XRayFileHeader Header = fdrCommonHeaderInfo();
+ Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
+ LW->WriteAll(reinterpret_cast<char *>(&Header),
+ reinterpret_cast<char *>(&Header) + sizeof(Header));
+
+ // Release the current thread's buffer before we attempt to write out all the
+ // buffers. This ensures that in case we had only a single thread going, that
+ // we are able to capture the data nonetheless.
+ auto &TLD = getThreadLocalData();
+ if (TLD.Controller != nullptr)
+ TLD.Controller->flush();
+
+ BQ->apply([&](const BufferQueue::Buffer &B) {
+ // Starting at version 2 of the FDR logging implementation, we only write
+ // the records identified by the extents of the buffer. We use the Extents
+ // from the Buffer and write that out as the first record in the buffer. We
+ // still use a Metadata record, but fill in the extents instead for the
+ // data.
+ MetadataRecord ExtentsRecord;
+ auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
+ DCHECK(BufferExtents <= B.Size);
+ ExtentsRecord.Type = uint8_t(RecordType::Metadata);
+ ExtentsRecord.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents);
+ internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
+ if (BufferExtents > 0) {
+ LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
+ reinterpret_cast<char *>(&ExtentsRecord) +
+ sizeof(MetadataRecord));
+ LW->WriteAll(reinterpret_cast<char *>(B.Data),
+ reinterpret_cast<char *>(B.Data) + BufferExtents);
+ }
+ });
+
+ atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+ if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_FINALIZING,
+ memory_order_release)) {
+ if (Verbosity())
+ Report("Cannot finalize log, implementation not initialized.\n");
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
+
+ // Do special things to make the log finalize itself, and not allow any more
+ // operations to be performed until re-initialized.
+ if (BQ == nullptr) {
+ if (Verbosity())
+ Report("Attempting to finalize an uninitialized global buffer!\n");
+ } else {
+ BQ->finalize();
+ }
+
+ atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+struct TSCAndCPU {
+ uint64_t TSC = 0;
+ unsigned char CPU = 0;
+};
+
+static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
+ // We want to get the TSC as early as possible, so that we can check whether
+ // we've seen this CPU before. We also do it before we load anything else,
+ // to allow for forward progress with the scheduling.
+ TSCAndCPU Result;
+
+ // Test once for required CPU features
+ static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
+ static bool TSCSupported = true;
+ pthread_once(
+ &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
+
+ if (TSCSupported) {
+ Result.TSC = __xray::readTSC(Result.CPU);
+ } else {
+ // FIXME: This code needs refactoring as it appears in multiple locations
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
+ TS = {0, 0};
+ }
+ Result.CPU = 0;
+ Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+ }
+ return Result;
+}
+
+thread_local atomic_uint8_t Running{0};
+
+static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
+ // Check if we're finalizing, before proceeding.
+ {
+ auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
+ if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
+ Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (TLD.Controller != nullptr) {
+ TLD.Controller->flush();
+ TLD.Controller = nullptr;
+ }
+ return false;
+ }
+ }
+
+ if (UNLIKELY(TLD.Controller == nullptr)) {
+ // Set up the TLD buffer queue.
+ if (UNLIKELY(BQ == nullptr))
+ return false;
+ TLD.BQ = BQ;
+
+ // Check that we have a valid buffer.
+ if (TLD.Buffer.Generation != BQ->generation() &&
+ TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
+ return false;
+
+ // Set up a buffer, before setting up the log writer. Bail out on failure.
+ if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
+ return false;
+
+ // Set up the Log Writer for this thread.
+ if (UNLIKELY(TLD.Writer == nullptr)) {
+ auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
+ new (LWStorage) FDRLogWriter(TLD.Buffer);
+ TLD.Writer = LWStorage;
+ } else {
+ TLD.Writer->resetRecord();
+ }
+
+ auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
+ new (CStorage)
+ FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
+ atomic_load_relaxed(&ThresholdTicks));
+ TLD.Controller = CStorage;
+ }
+
+ DCHECK_NE(TLD.Controller, nullptr);
+ return true;
+}
+
+void fdrLoggingHandleArg0(int32_t FuncId,
+ XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+ auto TC = getTimestamp();
+ auto &TSC = TC.TSC;
+ auto &CPU = TC.CPU;
+ RecursionGuard Guard{Running};
+ if (!Guard)
+ return;
+
+ auto &TLD = getThreadLocalData();
+ if (!setupTLD(TLD))
+ return;
+
+ switch (Entry) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ TLD.Controller->functionEnter(FuncId, TSC, CPU);
+ return;
+ case XRayEntryType::EXIT:
+ TLD.Controller->functionExit(FuncId, TSC, CPU);
+ return;
+ case XRayEntryType::TAIL:
+ TLD.Controller->functionTailExit(FuncId, TSC, CPU);
+ return;
+ case XRayEntryType::CUSTOM_EVENT:
+ case XRayEntryType::TYPED_EVENT:
+ break;
+ }
+}
+
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg) XRAY_NEVER_INSTRUMENT {
+ auto TC = getTimestamp();
+ auto &TSC = TC.TSC;
+ auto &CPU = TC.CPU;
+ RecursionGuard Guard{Running};
+ if (!Guard)
+ return;
+
+ auto &TLD = getThreadLocalData();
+ if (!setupTLD(TLD))
+ return;
+
+ switch (Entry) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
+ return;
+ case XRayEntryType::EXIT:
+ TLD.Controller->functionExit(FuncId, TSC, CPU);
+ return;
+ case XRayEntryType::TAIL:
+ TLD.Controller->functionTailExit(FuncId, TSC, CPU);
+ return;
+ case XRayEntryType::CUSTOM_EVENT:
+ case XRayEntryType::TYPED_EVENT:
+ break;
+ }
+}
+
+void fdrLoggingHandleCustomEvent(void *Event,
+ std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
+ auto TC = getTimestamp();
+ auto &TSC = TC.TSC;
+ auto &CPU = TC.CPU;
+ RecursionGuard Guard{Running};
+ if (!Guard)
+ return;
+
+ // Complain when we ever get at least one custom event that's larger than what
+ // we can possibly support.
+ if (EventSize >
+ static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
+ static pthread_once_t Once = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &Once, +[] {
+ Report("Custom event size too large; truncating to %d.\n",
+ std::numeric_limits<int32_t>::max());
+ });
+ }
+
+ auto &TLD = getThreadLocalData();
+ if (!setupTLD(TLD))
+ return;
+
+ int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
+ TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
+}
+
+void fdrLoggingHandleTypedEvent(
+ uint16_t EventType, const void *Event,
+ std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT {
+ auto TC = getTimestamp();
+ auto &TSC = TC.TSC;
+ auto &CPU = TC.CPU;
+ RecursionGuard Guard{Running};
+ if (!Guard)
+ return;
+
+ // Complain when we ever get at least one typed event that's larger than what
+ // we can possibly support.
+ if (EventSize >
+ static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
+ static pthread_once_t Once = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &Once, +[] {
+ Report("Typed event size too large; truncating to %d.\n",
+ std::numeric_limits<int32_t>::max());
+ });
+ }
+
+ auto &TLD = getThreadLocalData();
+ if (!setupTLD(TLD))
+ return;
+
+ int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
+ TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize);
+}
+
+XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ if (Options == nullptr)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+ memory_order_release)) {
+ if (Verbosity())
+ Report("Cannot initialize already initialized implementation.\n");
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
+
+ if (Verbosity())
+ Report("Initializing FDR mode with options: %s\n",
+ static_cast<const char *>(Options));
+
+ // TODO: Factor out the flags specific to the FDR mode implementation. For
+ // now, use the global/single definition of the flags, since the FDR mode
+ // flags are already defined there.
+ FlagParser FDRParser;
+ FDRFlags FDRFlags;
+ registerXRayFDRFlags(&FDRParser, &FDRFlags);
+ FDRFlags.setDefaults();
+
+ // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
+ // options until we migrate everyone to use the XRAY_FDR_OPTIONS
+ // compiler-provided options.
+ FDRParser.ParseString(useCompilerDefinedFlags());
+ FDRParser.ParseString(useCompilerDefinedFDRFlags());
+ auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
+ if (EnvOpts == nullptr)
+ EnvOpts = "";
+ FDRParser.ParseString(EnvOpts);
+
+ // FIXME: Remove this when we fully remove the deprecated flags.
+ if (internal_strlen(EnvOpts) == 0) {
+ FDRFlags.func_duration_threshold_us =
+ flags()->xray_fdr_log_func_duration_threshold_us;
+ FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
+ }
+
+ // The provided options should always override the compiler-provided and
+ // environment-variable defined options.
+ FDRParser.ParseString(static_cast<const char *>(Options));
+ *fdrFlags() = FDRFlags;
+ auto BufferSize = FDRFlags.buffer_size;
+ auto BufferMax = FDRFlags.buffer_max;
+
+ if (BQ == nullptr) {
+ bool Success = false;
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ new (BQ) BufferQueue(BufferSize, BufferMax, Success);
+ if (!Success) {
+ Report("BufferQueue init failed.\n");
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+ } else {
+ if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
+ if (Verbosity())
+ Report("Failed to re-initialize global buffer queue. Init failed.\n");
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+ }
+
+ static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &OnceInit, +[] {
+ atomic_store(&TicksPerSec,
+ probeRequiredCPUFeatures() ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond,
+ memory_order_release);
+ pthread_key_create(
+ &Key, +[](void *TLDPtr) {
+ if (TLDPtr == nullptr)
+ return;
+ auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
+ if (TLD.BQ == nullptr)
+ return;
+ if (TLD.Buffer.Data == nullptr)
+ return;
+ auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok)
+ Report("At thread exit, failed to release buffer at %p; "
+ "error=%s\n",
+ TLD.Buffer.Data, BufferQueue::getErrorString(EC));
+ });
+ });
+
+ atomic_store(&ThresholdTicks,
+ atomic_load_relaxed(&TicksPerSec) *
+ fdrFlags()->func_duration_threshold_us / 1000000,
+ memory_order_release);
+ // Arg1 handler should go in first to avoid concurrent code accidentally
+ // falling back to arg0 when it should have ran arg1.
+ __xray_set_handler_arg1(fdrLoggingHandleArg1);
+ // Install the actual handleArg0 handler after initialising the buffers.
+ __xray_set_handler(fdrLoggingHandleArg0);
+ __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
+ __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
+
+ // Install the buffer iterator implementation.
+ __xray_log_set_buffer_iterator(fdrIterator);
+
+ atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+ memory_order_release);
+
+ if (Verbosity())
+ Report("XRay FDR init successful.\n");
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
+ XRayLogImpl Impl{
+ fdrLoggingInit,
+ fdrLoggingFinalize,
+ fdrLoggingHandleArg0,
+ fdrLoggingFlush,
+ };
+ auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
+ Verbosity()) {
+ Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
+ RegistrationResult);
+ return false;
+ }
+
+ if (flags()->xray_fdr_log ||
+ !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
+ auto SelectResult = __xray_log_select_mode("xray-fdr");
+ if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
+ Verbosity()) {
+ Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
+ SelectResult);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h
new file mode 100644
index 000000000000..6df0057c4965
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h
@@ -0,0 +1,38 @@
+//===-- xray_fdr_logging.h ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOGGING_H
+#define XRAY_XRAY_FDR_LOGGING_H
+
+#include "xray/xray_log_interface.h"
+#include "xray_fdr_log_records.h"
+
+// FDR (Flight Data Recorder) Mode
+// ===============================
+//
+// The XRay whitepaper describes a mode of operation for function call trace
+// logging that involves writing small records into an in-memory circular
+// buffer, that then gets logged to disk on demand. To do this efficiently and
+// capture as much data as we can, we use smaller records compared to the
+// default mode of always writing fixed-size records.
+
+namespace __xray {
+XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
+ void *Options, size_t OptionsSize);
+XRayLogInitStatus fdrLoggingFinalize();
+void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1);
+XRayLogFlushStatus fdrLoggingFlush();
+XRayLogInitStatus fdrLoggingReset();
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOGGING_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp
new file mode 100644
index 000000000000..e4c6906dc443
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp
@@ -0,0 +1,84 @@
+//===-- xray_flags.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+Flags xray_flags_dont_use_directly; // use via flags().
+
+void Flags::setDefaults() XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
+ RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+// This function, as defined with the help of a macro meant to be introduced at
+// build time of the XRay runtime, passes in a statically defined list of
+// options that control XRay. This means users/deployments can tweak the
+// defaults that override the hard-coded defaults in the xray_flags.inc at
+// compile-time using the XRAY_DEFAULT_OPTIONS macro.
+const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
+#ifdef XRAY_DEFAULT_OPTIONS
+ // Do the double-layered string conversion to prevent badly crafted strings
+ // provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues
+ // (or changing the semantics of the implementation through the macro). This
+ // ensures that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a
+ // string literal.
+ return SANITIZER_STRINGIFY(XRAY_DEFAULT_OPTIONS);
+#else
+ return "";
+#endif
+}
+
+void initializeFlags() XRAY_NEVER_INSTRUMENT {
+ SetCommonFlagsDefaults();
+ auto *F = flags();
+ F->setDefaults();
+
+ FlagParser XRayParser;
+ registerXRayFlags(&XRayParser, F);
+ RegisterCommonFlags(&XRayParser);
+
+ // Use options defaulted at compile-time for the runtime.
+ const char *XRayCompileFlags = useCompilerDefinedFlags();
+ XRayParser.ParseString(XRayCompileFlags);
+
+ // Override from environment variables.
+ XRayParser.ParseStringFromEnv("XRAY_OPTIONS");
+
+ // Override from command line.
+ InitializeCommonFlags();
+
+ if (Verbosity())
+ ReportUnrecognizedFlags();
+
+ if (common_flags()->help) {
+ XRayParser.PrintFlagDescriptions();
+ }
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h
new file mode 100644
index 000000000000..edb5a5119f86
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h
@@ -0,0 +1,39 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_FLAGS_H
+#define XRAY_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+struct Flags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+
+ void setDefaults();
+};
+
+extern Flags xray_flags_dont_use_directly;
+extern void registerXRayFlags(FlagParser *P, Flags *F);
+const char *useCompilerDefinedFlags();
+inline Flags *flags() { return &xray_flags_dont_use_directly; }
+
+void initializeFlags();
+
+} // namespace __xray
+
+#endif // XRAY_FLAGS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc
new file mode 100644
index 000000000000..b7dc5a08f242
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc
@@ -0,0 +1,49 @@
+//===-- xray_flags.inc ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(bool, patch_premain, false,
+ "Whether to patch instrumentation points before main.")
+XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
+ "Filename base for the xray logfile.")
+XRAY_FLAG(const char *, xray_mode, "", "Mode to install by default.")
+XRAY_FLAG(uptr, xray_page_size_override, 0,
+ "Override the default page size for the system, in bytes. The size "
+ "should be a power-of-two.")
+
+// Basic (Naive) Mode logging options.
+XRAY_FLAG(bool, xray_naive_log, false,
+ "DEPRECATED: Use xray_mode=xray-basic instead.")
+XRAY_FLAG(int, xray_naive_log_func_duration_threshold_us, 5,
+ "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set "
+ "func_duration_threshold_us instead.")
+XRAY_FLAG(int, xray_naive_log_max_stack_depth, 64,
+ "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set "
+ "max_stack_depth instead.")
+XRAY_FLAG(int, xray_naive_log_thread_buffer_size, 1024,
+ "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set "
+ "thread_buffer_size instead.")
+
+// FDR (Flight Data Recorder) Mode logging options.
+XRAY_FLAG(bool, xray_fdr_log, false,
+ "DEPRECATED: Use xray_mode=xray-fdr instead.")
+XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
+ "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set "
+ "func_duration_threshold_us instead.")
+XRAY_FLAG(int, xray_fdr_log_grace_period_us, 0,
+ "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set "
+ "grace_period_ms instead.")
+XRAY_FLAG(int, xray_fdr_log_grace_period_ms, 100,
+ "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set "
+ "grace_period_ms instead.")
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
new file mode 100644
index 000000000000..b8c60583761b
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
@@ -0,0 +1,603 @@
+//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This file defines the interface for a function call trie.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FUNCTION_CALL_TRIE_H
+#define XRAY_FUNCTION_CALL_TRIE_H
+
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_profiling_flags.h"
+#include "xray_segmented_array.h"
+#include <limits>
+#include <memory> // For placement new.
+#include <utility>
+
+namespace __xray {
+
+/// A FunctionCallTrie represents the stack traces of XRay instrumented
+/// functions that we've encountered, where a node corresponds to a function and
+/// the path from the root to the node its stack trace. Each node in the trie
+/// will contain some useful values, including:
+///
+/// * The cumulative amount of time spent in this particular node/stack.
+/// * The number of times this stack has appeared.
+/// * A histogram of latencies for that particular node.
+///
+/// Each node in the trie will also contain a list of callees, represented using
+/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
+/// ID of the callee, and a pointer to the node.
+///
+/// If we visualise this data structure, we'll find the following potential
+/// representation:
+///
+/// [function id node] -> [callees] [cumulative time]
+/// [call counter] [latency histogram]
+///
+/// As an example, when we have a function in this pseudocode:
+///
+/// func f(N) {
+/// g()
+/// h()
+/// for i := 1..N { j() }
+/// }
+///
+/// We may end up with a trie of the following form:
+///
+/// f -> [ g, h, j ] [...] [1] [...]
+/// g -> [ ... ] [...] [1] [...]
+/// h -> [ ... ] [...] [1] [...]
+/// j -> [ ... ] [...] [N] [...]
+///
+/// If for instance the function g() called j() like so:
+///
+/// func g() {
+/// for i := 1..10 { j() }
+/// }
+///
+/// We'll find the following updated trie:
+///
+/// f -> [ g, h, j ] [...] [1] [...]
+/// g -> [ j' ] [...] [1] [...]
+/// h -> [ ... ] [...] [1] [...]
+/// j -> [ ... ] [...] [N] [...]
+/// j' -> [ ... ] [...] [10] [...]
+///
+/// Note that we'll have a new node representing the path `f -> g -> j'` with
+/// isolated data. This isolation gives us a means of representing the stack
+/// traces as a path, as opposed to a key in a table. The alternative
+/// implementation here would be to use a separate table for the path, and use
+/// hashes of the path as an identifier to accumulate the information. We've
+/// moved away from this approach as it takes a lot of time to compute the hash
+/// every time we need to update a function's call information as we're handling
+/// the entry and exit events.
+///
+/// This approach allows us to maintain a shadow stack, which represents the
+/// currently executing path, and on function exits quickly compute the amount
+/// of time elapsed from the entry, then update the counters for the node
+/// already represented in the trie. This necessitates an efficient
+/// representation of the various data structures (the list of callees must be
+/// cache-aware and efficient to look up, and the histogram must be compact and
+/// quick to update) to enable us to keep the overheads of this implementation
+/// to the minimum.
+class FunctionCallTrie {
+public:
+ struct Node;
+
+ // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
+ // standard library types in this header.
+ struct NodeIdPair {
+ Node *NodePtr;
+ int32_t FId;
+ };
+
+ using NodeIdPairArray = Array<NodeIdPair>;
+ using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
+
+ // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
+ // number of times this node actually appeared, the cumulative amount of time
+ // for this particular node including its children call times, and just the
+ // local time spent on this node. Each Node will have the ID of the XRay
+ // instrumented function that it is associated to.
+ struct Node {
+ Node *Parent;
+ NodeIdPairArray Callees;
+ uint64_t CallCount;
+ uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
+ int32_t FId;
+
+ // TODO: Include the compact histogram.
+ };
+
+private:
+ struct ShadowStackEntry {
+ uint64_t EntryTSC;
+ Node *NodePtr;
+ uint16_t EntryCPU;
+ };
+
+ using NodeArray = Array<Node>;
+ using RootArray = Array<Node *>;
+ using ShadowStackArray = Array<ShadowStackEntry>;
+
+public:
+ // We collate the allocators we need into a single struct, as a convenience to
+ // allow us to initialize these as a group.
+ struct Allocators {
+ using NodeAllocatorType = NodeArray::AllocatorType;
+ using RootAllocatorType = RootArray::AllocatorType;
+ using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
+
+ // Use hosted aligned storage members to allow for trivial move and init.
+ // This also allows us to sidestep the potential-failing allocation issue.
+ typename std::aligned_storage<sizeof(NodeAllocatorType),
+ alignof(NodeAllocatorType)>::type
+ NodeAllocatorStorage;
+ typename std::aligned_storage<sizeof(RootAllocatorType),
+ alignof(RootAllocatorType)>::type
+ RootAllocatorStorage;
+ typename std::aligned_storage<sizeof(ShadowStackAllocatorType),
+ alignof(ShadowStackAllocatorType)>::type
+ ShadowStackAllocatorStorage;
+ typename std::aligned_storage<sizeof(NodeIdPairAllocatorType),
+ alignof(NodeIdPairAllocatorType)>::type
+ NodeIdPairAllocatorStorage;
+
+ NodeAllocatorType *NodeAllocator = nullptr;
+ RootAllocatorType *RootAllocator = nullptr;
+ ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
+ NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
+
+ Allocators() = default;
+ Allocators(const Allocators &) = delete;
+ Allocators &operator=(const Allocators &) = delete;
+
+ struct Buffers {
+ BufferQueue::Buffer NodeBuffer;
+ BufferQueue::Buffer RootsBuffer;
+ BufferQueue::Buffer ShadowStackBuffer;
+ BufferQueue::Buffer NodeIdPairBuffer;
+ };
+
+ explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage)
+ RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
+ B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
+ B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
+ explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage) NodeAllocatorType(Max);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage) RootAllocatorType(Max);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
+ Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
+ // Here we rely on the safety of memcpy'ing contents of the storage
+ // members, and then pointing the source pointers to nullptr.
+ internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
+ sizeof(NodeAllocatorType));
+ internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
+ sizeof(RootAllocatorType));
+ internal_memcpy(&ShadowStackAllocatorStorage,
+ &O.ShadowStackAllocatorStorage,
+ sizeof(ShadowStackAllocatorType));
+ internal_memcpy(&NodeIdPairAllocatorStorage,
+ &O.NodeIdPairAllocatorStorage,
+ sizeof(NodeIdPairAllocatorType));
+
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+
+ O.NodeAllocator = nullptr;
+ O.RootAllocator = nullptr;
+ O.ShadowStackAllocator = nullptr;
+ O.NodeIdPairAllocator = nullptr;
+ }
+
+ Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
+ // When moving into an existing instance, we ensure that we clean up the
+ // current allocators.
+ if (NodeAllocator)
+ NodeAllocator->~NodeAllocatorType();
+ if (O.NodeAllocator) {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(std::move(*O.NodeAllocator));
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ O.NodeAllocator = nullptr;
+ } else {
+ NodeAllocator = nullptr;
+ }
+
+ if (RootAllocator)
+ RootAllocator->~RootAllocatorType();
+ if (O.RootAllocator) {
+ new (&RootAllocatorStorage)
+ RootAllocatorType(std::move(*O.RootAllocator));
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ O.RootAllocator = nullptr;
+ } else {
+ RootAllocator = nullptr;
+ }
+
+ if (ShadowStackAllocator)
+ ShadowStackAllocator->~ShadowStackAllocatorType();
+ if (O.ShadowStackAllocator) {
+ new (&ShadowStackAllocatorStorage)
+ ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ O.ShadowStackAllocator = nullptr;
+ } else {
+ ShadowStackAllocator = nullptr;
+ }
+
+ if (NodeIdPairAllocator)
+ NodeIdPairAllocator->~NodeIdPairAllocatorType();
+ if (O.NodeIdPairAllocator) {
+ new (&NodeIdPairAllocatorStorage)
+ NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ O.NodeIdPairAllocator = nullptr;
+ } else {
+ NodeIdPairAllocator = nullptr;
+ }
+
+ return *this;
+ }
+
+ ~Allocators() XRAY_NEVER_INSTRUMENT {
+ if (NodeAllocator != nullptr)
+ NodeAllocator->~NodeAllocatorType();
+ if (RootAllocator != nullptr)
+ RootAllocator->~RootAllocatorType();
+ if (ShadowStackAllocator != nullptr)
+ ShadowStackAllocator->~ShadowStackAllocatorType();
+ if (NodeIdPairAllocator != nullptr)
+ NodeIdPairAllocator->~NodeIdPairAllocatorType();
+ }
+ };
+
+ static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
+ return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
+ }
+
+ static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
+ Allocators A(Max);
+ return A;
+ }
+
+ static Allocators
+ InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
+ Allocators A(Bufs);
+ return A;
+ }
+
+private:
+ NodeArray Nodes;
+ RootArray Roots;
+ ShadowStackArray ShadowStack;
+ NodeIdPairAllocatorType *NodeIdPairAllocator;
+ uint32_t OverflowedFunctions;
+
+public:
+ explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
+ : Nodes(*A.NodeAllocator),
+ Roots(*A.RootAllocator),
+ ShadowStack(*A.ShadowStackAllocator),
+ NodeIdPairAllocator(A.NodeIdPairAllocator),
+ OverflowedFunctions(0) {}
+
+ FunctionCallTrie() = delete;
+ FunctionCallTrie(const FunctionCallTrie &) = delete;
+ FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
+
+ FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
+ : Nodes(std::move(O.Nodes)),
+ Roots(std::move(O.Roots)),
+ ShadowStack(std::move(O.ShadowStack)),
+ NodeIdPairAllocator(O.NodeIdPairAllocator),
+ OverflowedFunctions(O.OverflowedFunctions) {}
+
+ FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
+ Nodes = std::move(O.Nodes);
+ Roots = std::move(O.Roots);
+ ShadowStack = std::move(O.ShadowStack);
+ NodeIdPairAllocator = O.NodeIdPairAllocator;
+ OverflowedFunctions = O.OverflowedFunctions;
+ return *this;
+ }
+
+ ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
+
+ void enterFunction(const int32_t FId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(FId, 0);
+
+ // If we're already overflowed the function call stack, do not bother
+ // attempting to record any more function entries.
+ if (UNLIKELY(OverflowedFunctions)) {
+ ++OverflowedFunctions;
+ return;
+ }
+
+ // If this is the first function we've encountered, we want to set up the
+ // node(s) and treat it as a root.
+ if (UNLIKELY(ShadowStack.empty())) {
+ auto *NewRoot = Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
+ if (UNLIKELY(NewRoot == nullptr))
+ return;
+ if (Roots.AppendEmplace(NewRoot) == nullptr) {
+ Nodes.trim(1);
+ return;
+ }
+ if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
+ Nodes.trim(1);
+ Roots.trim(1);
+ ++OverflowedFunctions;
+ return;
+ }
+ return;
+ }
+
+ // From this point on, we require that the stack is not empty.
+ DCHECK(!ShadowStack.empty());
+ auto TopNode = ShadowStack.back().NodePtr;
+ DCHECK_NE(TopNode, nullptr);
+
+ // If we've seen this callee before, then we access that node and place that
+ // on the top of the stack.
+ auto* Callee = TopNode->Callees.find_element(
+ [FId](const NodeIdPair &NR) { return NR.FId == FId; });
+ if (Callee != nullptr) {
+ CHECK_NE(Callee->NodePtr, nullptr);
+ if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
+ ++OverflowedFunctions;
+ return;
+ }
+
+ // This means we've never seen this stack before, create a new node here.
+ auto* NewNode = Nodes.AppendEmplace(
+ TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
+ if (UNLIKELY(NewNode == nullptr))
+ return;
+ DCHECK_NE(NewNode, nullptr);
+ TopNode->Callees.AppendEmplace(NewNode, FId);
+ if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
+ ++OverflowedFunctions;
+ return;
+ }
+
+ void exitFunction(int32_t FId, uint64_t TSC,
+ uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ // If we're exiting functions that have "overflowed" or don't fit into the
+ // stack due to allocator constraints, we then decrement that count first.
+ if (OverflowedFunctions) {
+ --OverflowedFunctions;
+ return;
+ }
+
+ // When we exit a function, we look up the ShadowStack to see whether we've
+ // entered this function before. We do as little processing here as we can,
+ // since most of the hard work would have already been done at function
+ // entry.
+ uint64_t CumulativeTreeTime = 0;
+
+ while (!ShadowStack.empty()) {
+ const auto &Top = ShadowStack.back();
+ auto TopNode = Top.NodePtr;
+ DCHECK_NE(TopNode, nullptr);
+
+ // We may encounter overflow on the TSC we're provided, which may end up
+ // being less than the TSC when we first entered the function.
+ //
+ // To get the accurate measurement of cycles, we need to check whether
+ // we've overflowed (TSC < Top.EntryTSC) and then account the difference
+ // between the entry TSC and the max for the TSC counter (max of uint64_t)
+ // then add the value of TSC. We can prove that the maximum delta we will
+ // get is at most the 64-bit unsigned value, since the difference between
+ // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
+ // - 1) + 1.
+ //
+ // NOTE: This assumes that TSCs are synchronised across CPUs.
+ // TODO: Count the number of times we've seen CPU migrations.
+ uint64_t LocalTime =
+ Top.EntryTSC > TSC
+ ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
+ : TSC - Top.EntryTSC;
+ TopNode->CallCount++;
+ TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
+ CumulativeTreeTime += LocalTime;
+ ShadowStack.trim(1);
+
+ // TODO: Update the histogram for the node.
+ if (TopNode->FId == FId)
+ break;
+ }
+ }
+
+ const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
+
+ // The deepCopyInto operation will update the provided FunctionCallTrie by
+ // re-creating the contents of this particular FunctionCallTrie in the other
+ // FunctionCallTrie. It will do this using a Depth First Traversal from the
+ // roots, and while doing so recreating the traversal in the provided
+ // FunctionCallTrie.
+ //
+ // This operation will *not* destroy the state in `O`, and thus may cause some
+ // duplicate entries in `O` if it is not empty.
+ //
+ // This function is *not* thread-safe, and may require external
+ // synchronisation of both "this" and |O|.
+ //
+ // This function must *not* be called with a non-empty FunctionCallTrie |O|.
+ void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
+ DCHECK(O.getRoots().empty());
+
+ // We then push the root into a stack, to use as the parent marker for new
+ // nodes we push in as we're traversing depth-first down the call tree.
+ struct NodeAndParent {
+ FunctionCallTrie::Node *Node;
+ FunctionCallTrie::Node *NewNode;
+ };
+ using Stack = Array<NodeAndParent>;
+
+ typename Stack::AllocatorType StackAllocator(
+ profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator);
+
+ for (const auto Root : getRoots()) {
+ // Add a node in O for this root.
+ auto NewRoot = O.Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
+ Root->CumulativeLocalTime, Root->FId);
+
+ // Because we cannot allocate more memory we should bail out right away.
+ if (UNLIKELY(NewRoot == nullptr))
+ return;
+
+ if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
+ return;
+
+ // TODO: Figure out what to do if we fail to allocate any more stack
+ // space. Maybe warn or report once?
+ if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
+ return;
+ while (!DFSStack.empty()) {
+ NodeAndParent NP = DFSStack.back();
+ DCHECK_NE(NP.Node, nullptr);
+ DCHECK_NE(NP.NewNode, nullptr);
+ DFSStack.trim(1);
+ for (const auto Callee : NP.Node->Callees) {
+ auto NewNode = O.Nodes.AppendEmplace(
+ NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
+ Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
+ Callee.FId);
+ if (UNLIKELY(NewNode == nullptr))
+ return;
+ if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
+ nullptr))
+ return;
+ if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
+ nullptr))
+ return;
+ }
+ }
+ }
+ }
+
+ // The mergeInto operation will update the provided FunctionCallTrie by
+ // traversing the current trie's roots and updating (i.e. merging) the data in
+ // the nodes with the data in the target's nodes. If the node doesn't exist in
+ // the provided trie, we add a new one in the right position, and inherit the
+ // data from the original (current) trie, along with all its callees.
+ //
+ // This function is *not* thread-safe, and may require external
+ // synchronisation of both "this" and |O|.
+ void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
+ struct NodeAndTarget {
+ FunctionCallTrie::Node *OrigNode;
+ FunctionCallTrie::Node *TargetNode;
+ };
+ using Stack = Array<NodeAndTarget>;
+ typename Stack::AllocatorType StackAllocator(
+ profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator);
+
+ for (const auto Root : getRoots()) {
+ Node *TargetRoot = nullptr;
+ auto R = O.Roots.find_element(
+ [&](const Node *Node) { return Node->FId == Root->FId; });
+ if (R == nullptr) {
+ TargetRoot = O.Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Root->FId);
+ if (UNLIKELY(TargetRoot == nullptr))
+ return;
+
+ O.Roots.Append(TargetRoot);
+ } else {
+ TargetRoot = *R;
+ }
+
+ DFSStack.AppendEmplace(Root, TargetRoot);
+ while (!DFSStack.empty()) {
+ NodeAndTarget NT = DFSStack.back();
+ DCHECK_NE(NT.OrigNode, nullptr);
+ DCHECK_NE(NT.TargetNode, nullptr);
+ DFSStack.trim(1);
+ // TODO: Update the histogram as well when we have it ready.
+ NT.TargetNode->CallCount += NT.OrigNode->CallCount;
+ NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
+ for (const auto Callee : NT.OrigNode->Callees) {
+ auto TargetCallee = NT.TargetNode->Callees.find_element(
+ [&](const FunctionCallTrie::NodeIdPair &C) {
+ return C.FId == Callee.FId;
+ });
+ if (TargetCallee == nullptr) {
+ auto NewTargetNode = O.Nodes.AppendEmplace(
+ NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Callee.FId);
+
+ if (UNLIKELY(NewTargetNode == nullptr))
+ return;
+
+ TargetCallee =
+ NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
+ }
+ DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
+ }
+ }
+ }
+ }
+};
+
+} // namespace __xray
+
+#endif // XRAY_FUNCTION_CALL_TRIE_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp
new file mode 100644
index 000000000000..00ba5fe4a52b
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp
@@ -0,0 +1,131 @@
+//===-- xray_init.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic.
+//===----------------------------------------------------------------------===//
+
+#include <fcntl.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+extern "C" {
+void __xray_init();
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
+
+#if SANITIZER_MAC
+// HACK: This is a temporary workaround to make XRay build on
+// Darwin, but it will probably not work at runtime.
+const XRaySledEntry __start_xray_instr_map[] = {};
+extern const XRaySledEntry __stop_xray_instr_map[] = {};
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {};
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {};
+#endif
+}
+
+using namespace __xray;
+
+// When set to 'true' this means the XRay runtime has been initialised. We use
+// the weak symbols defined above (__start_xray_inst_map and
+// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
+// for runtime patching/unpatching of instrumentation points.
+//
+// FIXME: Support DSO instrumentation maps too. The current solution only works
+// for statically linked executables.
+atomic_uint8_t XRayInitialized{0};
+
+// This should always be updated before XRayInitialized is updated.
+SpinMutex XRayInstrMapMutex;
+XRaySledMap XRayInstrMap;
+
+// Global flag to determine whether the flags have been initialized.
+atomic_uint8_t XRayFlagsInitialized{0};
+
+// A mutex to allow only one thread to initialize the XRay data structures.
+SpinMutex XRayInitMutex;
+
+// __xray_init() will do the actual loading of the current process' memory map
+// and then proceed to look for the .xray_instr_map section/segment.
+void __xray_init() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayInitMutex);
+ // Short-circuit if we've already initialized XRay before.
+ if (atomic_load(&XRayInitialized, memory_order_acquire))
+ return;
+
+ // XRAY is not compatible with PaX MPROTECT
+ CheckMPROTECT();
+
+ if (!atomic_load(&XRayFlagsInitialized, memory_order_acquire)) {
+ initializeFlags();
+ atomic_store(&XRayFlagsInitialized, true, memory_order_release);
+ }
+
+ if (__start_xray_instr_map == nullptr) {
+ if (Verbosity())
+ Report("XRay instrumentation map missing. Not initializing XRay.\n");
+ return;
+ }
+
+ {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ XRayInstrMap.Sleds = __start_xray_instr_map;
+ XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+ if (__start_xray_fn_idx != nullptr) {
+ XRayInstrMap.SledsIndex = __start_xray_fn_idx;
+ XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
+ } else {
+ size_t CountFunctions = 0;
+ uint64_t LastFnAddr = 0;
+
+ for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) {
+ const auto &Sled = XRayInstrMap.Sleds[I];
+ const auto Function = Sled.function();
+ if (Function != LastFnAddr) {
+ CountFunctions++;
+ LastFnAddr = Function;
+ }
+ }
+
+ XRayInstrMap.Functions = CountFunctions;
+ }
+ }
+ atomic_store(&XRayInitialized, true, memory_order_release);
+
+#ifndef XRAY_NO_PREINIT
+ if (flags()->patch_premain)
+ __xray_patch();
+#endif
+}
+
+// FIXME: Make check-xray tests work on FreeBSD without
+// SANITIZER_CAN_USE_PREINIT_ARRAY.
+// See sanitizer_internal_defs.h where the macro is defined.
+// Calling unresolved PLT functions in .preinit_array can lead to deadlock on
+// FreeBSD but here it seems benign.
+#if !defined(XRAY_NO_PREINIT) && \
+ (SANITIZER_CAN_USE_PREINIT_ARRAY || SANITIZER_FREEBSD)
+// Only add the preinit array initialization if the sanitizers can.
+__attribute__((section(".preinit_array"),
+ used)) void (*__local_xray_preinit)(void) = __xray_init;
+#else
+// If we cannot use the .preinit_array section, we should instead use dynamic
+// initialisation.
+__attribute__ ((constructor (0)))
+static void __local_xray_dyninit() {
+ __xray_init();
+}
+#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp
new file mode 100644
index 000000000000..7669b9ab82be
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp
@@ -0,0 +1,518 @@
+//===-- xray_interface.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_interface_internal.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <errno.h>
+#include <limits>
+#include <string.h>
+#include <sys/mman.h>
+
+#if SANITIZER_FUCHSIA
+#include <zircon/process.h>
+#include <zircon/sanitizer.h>
+#include <zircon/status.h>
+#include <zircon/syscalls.h>
+#endif
+
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+#include "xray_defs.h"
+#include "xray_flags.h"
+
+extern __sanitizer::SpinMutex XRayInstrMapMutex;
+extern __sanitizer::atomic_uint8_t XRayInitialized;
+extern __xray::XRaySledMap XRayInstrMap;
+
+namespace __xray {
+
+#if defined(__x86_64__)
+static const int16_t cSledLength = 12;
+#elif defined(__aarch64__)
+static const int16_t cSledLength = 32;
+#elif defined(__arm__)
+static const int16_t cSledLength = 28;
+#elif SANITIZER_MIPS32
+static const int16_t cSledLength = 48;
+#elif SANITIZER_MIPS64
+static const int16_t cSledLength = 64;
+#elif defined(__powerpc64__)
+static const int16_t cSledLength = 8;
+#else
+#error "Unsupported CPU Architecture"
+#endif /* CPU architecture */
+
+// This is the function to call when we encounter the entry or exit sleds.
+atomic_uintptr_t XRayPatchedFunction{0};
+
+// This is the function to call from the arg1-enabled sleds/trampolines.
+atomic_uintptr_t XRayArgLogger{0};
+
+// This is the function to call when we encounter a custom event log call.
+atomic_uintptr_t XRayPatchedCustomEvent{0};
+
+// This is the function to call when we encounter a typed event log call.
+atomic_uintptr_t XRayPatchedTypedEvent{0};
+
+// This is the global status to determine whether we are currently
+// patching/unpatching.
+atomic_uint8_t XRayPatching{0};
+
+struct TypeDescription {
+ uint32_t type_id;
+ std::size_t description_string_length;
+};
+
+using TypeDescriptorMapType = AddrHashMap<TypeDescription, 11>;
+// An address map from immutable descriptors to type ids.
+TypeDescriptorMapType TypeDescriptorAddressMap{};
+
+atomic_uint32_t TypeEventDescriptorCounter{0};
+
+// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will
+// undo any successful mprotect(...) changes. This is used to make a page
+// writeable and executable, and upon destruction if it was successful in
+// doing so returns the page into a read-only and executable page.
+//
+// This is only used specifically for runtime-patching of the XRay
+// instrumentation points. This assumes that the executable pages are
+// originally read-and-execute only.
+class MProtectHelper {
+ void *PageAlignedAddr;
+ std::size_t MProtectLen;
+ bool MustCleanup;
+
+public:
+ explicit MProtectHelper(void *PageAlignedAddr,
+ std::size_t MProtectLen,
+ std::size_t PageSize) XRAY_NEVER_INSTRUMENT
+ : PageAlignedAddr(PageAlignedAddr),
+ MProtectLen(MProtectLen),
+ MustCleanup(false) {
+#if SANITIZER_FUCHSIA
+ MProtectLen = RoundUpTo(MProtectLen, PageSize);
+#endif
+ }
+
+ int MakeWriteable() XRAY_NEVER_INSTRUMENT {
+#if SANITIZER_FUCHSIA
+ auto R = __sanitizer_change_code_protection(
+ reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, true);
+ if (R != ZX_OK) {
+ Report("XRay: cannot change code protection: %s\n",
+ _zx_status_get_string(R));
+ return -1;
+ }
+ MustCleanup = true;
+ return 0;
+#else
+ auto R = mprotect(PageAlignedAddr, MProtectLen,
+ PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (R != -1)
+ MustCleanup = true;
+ return R;
+#endif
+ }
+
+ ~MProtectHelper() XRAY_NEVER_INSTRUMENT {
+ if (MustCleanup) {
+#if SANITIZER_FUCHSIA
+ auto R = __sanitizer_change_code_protection(
+ reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, false);
+ if (R != ZX_OK) {
+ Report("XRay: cannot change code protection: %s\n",
+ _zx_status_get_string(R));
+ }
+#else
+ mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC);
+#endif
+ }
+ }
+};
+
+namespace {
+
+bool patchSled(const XRaySledEntry &Sled, bool Enable,
+ int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ bool Success = false;
+ switch (Sled.Kind) {
+ case XRayEntryType::ENTRY:
+ Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+ break;
+ case XRayEntryType::EXIT:
+ Success = patchFunctionExit(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::TAIL:
+ Success = patchFunctionTailExit(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+ break;
+ case XRayEntryType::CUSTOM_EVENT:
+ Success = patchCustomEvent(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::TYPED_EVENT:
+ Success = patchTypedEvent(Enable, FuncId, Sled);
+ break;
+ default:
+ Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind));
+ return false;
+ }
+ return Success;
+}
+
+const XRayFunctionSledIndex
+findFunctionSleds(int32_t FuncId,
+ const XRaySledMap &InstrMap) XRAY_NEVER_INSTRUMENT {
+ int32_t CurFn = 0;
+ uint64_t LastFnAddr = 0;
+ XRayFunctionSledIndex Index = {nullptr, nullptr};
+
+ for (std::size_t I = 0; I < InstrMap.Entries && CurFn <= FuncId; I++) {
+ const auto &Sled = InstrMap.Sleds[I];
+ const auto Function = Sled.function();
+ if (Function != LastFnAddr) {
+ CurFn++;
+ LastFnAddr = Function;
+ }
+
+ if (CurFn == FuncId) {
+ if (Index.Begin == nullptr)
+ Index.Begin = &Sled;
+ Index.End = &Sled;
+ }
+ }
+
+ Index.End += 1;
+
+ return Index;
+}
+
+XRayPatchingStatus patchFunction(int32_t FuncId,
+ bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!atomic_load(&XRayInitialized,
+ memory_order_acquire))
+ return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+ uint8_t NotPatching = false;
+ if (!atomic_compare_exchange_strong(
+ &XRayPatching, &NotPatching, true, memory_order_acq_rel))
+ return XRayPatchingStatus::ONGOING; // Already patching.
+
+ // Next, we look for the function index.
+ XRaySledMap InstrMap;
+ {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+
+ // If we don't have an index, we can't patch individual functions.
+ if (InstrMap.Functions == 0)
+ return XRayPatchingStatus::NOT_INITIALIZED;
+
+ // FuncId must be a positive number, less than the number of functions
+ // instrumented.
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
+ Report("Invalid function id provided: %d\n", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ // Now we patch ths sleds for this specific function.
+ auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1]
+ : findFunctionSleds(FuncId, InstrMap);
+ auto *f = SledRange.Begin;
+ auto *e = SledRange.End;
+ bool SucceedOnce = false;
+ while (f != e)
+ SucceedOnce |= patchSled(*f++, Enable, FuncId);
+
+ atomic_store(&XRayPatching, false,
+ memory_order_release);
+
+ if (!SucceedOnce) {
+ Report("Failed patching any sled for function '%d'.", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ return XRayPatchingStatus::SUCCESS;
+}
+
+// controlPatching implements the common internals of the patching/unpatching
+// implementation. |Enable| defines whether we're enabling or disabling the
+// runtime XRay instrumentation.
+XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!atomic_load(&XRayInitialized,
+ memory_order_acquire))
+ return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+ uint8_t NotPatching = false;
+ if (!atomic_compare_exchange_strong(
+ &XRayPatching, &NotPatching, true, memory_order_acq_rel))
+ return XRayPatchingStatus::ONGOING; // Already patching.
+
+ uint8_t PatchingSuccess = false;
+ auto XRayPatchingStatusResetter =
+ at_scope_exit([&PatchingSuccess] {
+ if (!PatchingSuccess)
+ atomic_store(&XRayPatching, false,
+ memory_order_release);
+ });
+
+ XRaySledMap InstrMap;
+ {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+ if (InstrMap.Entries == 0)
+ return XRayPatchingStatus::NOT_INITIALIZED;
+
+ uint32_t FuncId = 1;
+ uint64_t CurFun = 0;
+
+ // First we want to find the bounds for which we have instrumentation points,
+ // and try to get as few calls to mprotect(...) as possible. We're assuming
+ // that all the sleds for the instrumentation map are contiguous as a single
+ // set of pages. When we do support dynamic shared object instrumentation,
+ // we'll need to do this for each set of page load offsets per DSO loaded. For
+ // now we're assuming we can mprotect the whole section of text between the
+ // minimum sled address and the maximum sled address (+ the largest sled
+ // size).
+ auto *MinSled = &InstrMap.Sleds[0];
+ auto *MaxSled = &InstrMap.Sleds[InstrMap.Entries - 1];
+ for (std::size_t I = 0; I < InstrMap.Entries; I++) {
+ const auto &Sled = InstrMap.Sleds[I];
+ if (Sled.address() < MinSled->address())
+ MinSled = &Sled;
+ if (Sled.address() > MaxSled->address())
+ MaxSled = &Sled;
+ }
+
+ const size_t PageSize = flags()->xray_page_size_override > 0
+ ? flags()->xray_page_size_override
+ : GetPageSizeCached();
+ if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
+ Report("System page size is not a power of two: %lld\n", PageSize);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1));
+ size_t MProtectLen =
+ (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) +
+ cSledLength;
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize);
+ if (Protector.MakeWriteable() == -1) {
+ Report("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ for (std::size_t I = 0; I < InstrMap.Entries; ++I) {
+ auto &Sled = InstrMap.Sleds[I];
+ auto F = Sled.function();
+ if (CurFun == 0)
+ CurFun = F;
+ if (F != CurFun) {
+ ++FuncId;
+ CurFun = F;
+ }
+ patchSled(Sled, Enable, FuncId);
+ }
+ atomic_store(&XRayPatching, false,
+ memory_order_release);
+ PatchingSuccess = true;
+ return XRayPatchingStatus::SUCCESS;
+}
+
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+ bool Enable) XRAY_NEVER_INSTRUMENT {
+ XRaySledMap InstrMap;
+ {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+
+ // FuncId must be a positive number, less than the number of functions
+ // instrumented.
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
+ Report("Invalid function id provided: %d\n", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ const size_t PageSize = flags()->xray_page_size_override > 0
+ ? flags()->xray_page_size_override
+ : GetPageSizeCached();
+ if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
+ Report("Provided page size is not a power of two: %lld\n", PageSize);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ // Here we compute the minumum sled and maximum sled associated with a
+ // particular function ID.
+ auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1]
+ : findFunctionSleds(FuncId, InstrMap);
+ auto *f = SledRange.Begin;
+ auto *e = SledRange.End;
+ auto *MinSled = f;
+ auto *MaxSled = (SledRange.End - 1);
+ while (f != e) {
+ if (f->address() < MinSled->address())
+ MinSled = f;
+ if (f->address() > MaxSled->address())
+ MaxSled = f;
+ ++f;
+ }
+
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1));
+ size_t MProtectLen =
+ (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) +
+ cSledLength;
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize);
+ if (Protector.MakeWriteable() == -1) {
+ Report("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+ return patchFunction(FuncId, Enable);
+}
+
+} // namespace
+
+} // namespace __xray
+
+using namespace __xray;
+
+// The following functions are declared `extern "C" {...}` in the header, hence
+// they're defined in the global namespace.
+
+int __xray_set_handler(void (*entry)(int32_t,
+ XRayEntryType)) XRAY_NEVER_INSTRUMENT {
+ if (atomic_load(&XRayInitialized,
+ memory_order_acquire)) {
+
+ atomic_store(&__xray::XRayPatchedFunction,
+ reinterpret_cast<uintptr_t>(entry),
+ memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_set_customevent_handler(void (*entry)(void *, size_t))
+ XRAY_NEVER_INSTRUMENT {
+ if (atomic_load(&XRayInitialized,
+ memory_order_acquire)) {
+ atomic_store(&__xray::XRayPatchedCustomEvent,
+ reinterpret_cast<uintptr_t>(entry),
+ memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_set_typedevent_handler(void (*entry)(
+ uint16_t, const void *, size_t)) XRAY_NEVER_INSTRUMENT {
+ if (atomic_load(&XRayInitialized,
+ memory_order_acquire)) {
+ atomic_store(&__xray::XRayPatchedTypedEvent,
+ reinterpret_cast<uintptr_t>(entry),
+ memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_handler(nullptr);
+}
+
+int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_customevent_handler(nullptr);
+}
+
+int __xray_remove_typedevent_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_typedevent_handler(nullptr);
+}
+
+uint16_t __xray_register_event_type(
+ const char *const event_type) XRAY_NEVER_INSTRUMENT {
+ TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type);
+ if (h.created()) {
+ h->type_id = atomic_fetch_add(
+ &TypeEventDescriptorCounter, 1, memory_order_acq_rel);
+ h->description_string_length = strnlen(event_type, 1024);
+ }
+ return h->type_id;
+}
+
+XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
+ return controlPatching(true);
+}
+
+XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
+ return controlPatching(false);
+}
+
+XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ return mprotectAndPatchFunction(FuncId, true);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ return mprotectAndPatchFunction(FuncId, false);
+}
+
+int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
+ if (!atomic_load(&XRayInitialized,
+ memory_order_acquire))
+ return 0;
+
+ // A relaxed write might not be visible even if the current thread gets
+ // scheduled on a different CPU/NUMA node. We need to wait for everyone to
+ // have this handler installed for consistency of collected data across CPUs.
+ atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
+ memory_order_release);
+ return 1;
+}
+
+int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
+
+uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ XRaySledMap InstrMap;
+ {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions)
+ return 0;
+ const XRaySledEntry *Sled = InstrMap.SledsIndex
+ ? InstrMap.SledsIndex[FuncId - 1].Begin
+ : findFunctionSleds(FuncId, InstrMap).Begin;
+ return Sled->function()
+// On PPC, function entries are always aligned to 16 bytes. The beginning of a
+// sled might be a local entry, which is always +8 based on the global entry.
+// Always return the global entry.
+#ifdef __PPC__
+ & ~0xf
+#endif
+ ;
+}
+
+size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ return XRayInstrMap.Functions;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h
new file mode 100644
index 000000000000..390f389b1dca
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h
@@ -0,0 +1,104 @@
+//===-- xray_interface_internal.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions. See also include/xray/xray_interface.h.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_INTERFACE_INTERNAL_H
+#define XRAY_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_platform.h"
+#include "xray/xray_interface.h"
+#include <cstddef>
+#include <cstdint>
+
+extern "C" {
+
+struct XRaySledEntry {
+#if SANITIZER_WORDSIZE == 64
+ uint64_t Address;
+ uint64_t Function;
+ unsigned char Kind;
+ unsigned char AlwaysInstrument;
+ unsigned char Version;
+ unsigned char Padding[13]; // Need 32 bytes
+ uint64_t function() const {
+ if (Version < 2)
+ return Function;
+ // The target address is relative to the location of the Function variable.
+ return reinterpret_cast<uint64_t>(&Function) + Function;
+ }
+ uint64_t address() const {
+ if (Version < 2)
+ return Address;
+ // The target address is relative to the location of the Address variable.
+ return reinterpret_cast<uint64_t>(&Address) + Address;
+ }
+#elif SANITIZER_WORDSIZE == 32
+ uint32_t Address;
+ uint32_t Function;
+ unsigned char Kind;
+ unsigned char AlwaysInstrument;
+ unsigned char Version;
+ unsigned char Padding[5]; // Need 16 bytes
+ uint32_t function() const {
+ if (Version < 2)
+ return Function;
+ // The target address is relative to the location of the Function variable.
+ return reinterpret_cast<uint32_t>(&Function) + Function;
+ }
+ uint32_t address() const {
+ if (Version < 2)
+ return Address;
+ // The target address is relative to the location of the Address variable.
+ return reinterpret_cast<uint32_t>(&Address) + Address;
+ }
+#else
+#error "Unsupported word size."
+#endif
+};
+
+struct XRayFunctionSledIndex {
+ const XRaySledEntry *Begin;
+ const XRaySledEntry *End;
+};
+}
+
+namespace __xray {
+
+struct XRaySledMap {
+ const XRaySledEntry *Sleds;
+ size_t Entries;
+ const XRayFunctionSledIndex *SledsIndex;
+ size_t Functions;
+};
+
+bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+ void (*Trampoline)());
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled);
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+
+} // namespace __xray
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
+extern void __xray_TypedEvent();
+}
+
+#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp
new file mode 100644
index 000000000000..fc70373f9dac
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp
@@ -0,0 +1,209 @@
+//===-- xray_log_interface.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray/xray_log_interface.h"
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "xray/xray_interface.h"
+#include "xray_defs.h"
+
+namespace __xray {
+static SpinMutex XRayImplMutex;
+static XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr};
+static XRayLogImpl *GlobalXRayImpl = nullptr;
+
+// This is the default implementation of a buffer iterator, which always yields
+// a null buffer.
+XRayBuffer NullBufferIterator(XRayBuffer) XRAY_NEVER_INSTRUMENT {
+ return {nullptr, 0};
+}
+
+// This is the global function responsible for iterating through given buffers.
+atomic_uintptr_t XRayBufferIterator{
+ reinterpret_cast<uintptr_t>(&NullBufferIterator)};
+
+// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list
+// when it should be a map because we're avoiding having to depend on C++
+// standard library data structures at this level of the implementation.
+struct ModeImpl {
+ ModeImpl *Next;
+ const char *Mode;
+ XRayLogImpl Impl;
+};
+
+static ModeImpl SentinelModeImpl{
+ nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}};
+static ModeImpl *ModeImpls = &SentinelModeImpl;
+static const ModeImpl *CurrentMode = nullptr;
+
+} // namespace __xray
+
+using namespace __xray;
+
+void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer))
+ XRAY_NEVER_INSTRUMENT {
+ atomic_store(&__xray::XRayBufferIterator,
+ reinterpret_cast<uintptr_t>(Iterator), memory_order_release);
+}
+
+void __xray_log_remove_buffer_iterator() XRAY_NEVER_INSTRUMENT {
+ __xray_log_set_buffer_iterator(&NullBufferIterator);
+}
+
+XRayLogRegisterStatus
+__xray_log_register_mode(const char *Mode,
+ XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
+ if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr ||
+ Impl.log_finalize == nullptr || Impl.log_init == nullptr)
+ return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL;
+
+ SpinMutexLock Guard(&XRayImplMutex);
+ // First, look for whether the mode already has a registered implementation.
+ for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) {
+ if (!internal_strcmp(Mode, it->Mode))
+ return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE;
+ }
+ auto *NewModeImpl = static_cast<ModeImpl *>(InternalAlloc(sizeof(ModeImpl)));
+ NewModeImpl->Next = ModeImpls;
+ NewModeImpl->Mode = internal_strdup(Mode);
+ NewModeImpl->Impl = Impl;
+ ModeImpls = NewModeImpl;
+ return XRayLogRegisterStatus::XRAY_REGISTRATION_OK;
+}
+
+XRayLogRegisterStatus
+__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) {
+ if (!internal_strcmp(Mode, it->Mode)) {
+ CurrentMode = it;
+ CurrentXRayImpl = it->Impl;
+ GlobalXRayImpl = &CurrentXRayImpl;
+ __xray_set_handler(it->Impl.handle_arg0);
+ return XRayLogRegisterStatus::XRAY_REGISTRATION_OK;
+ }
+ }
+ return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND;
+}
+
+const char *__xray_log_get_current_mode() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (CurrentMode != nullptr)
+ return CurrentMode->Mode;
+ return nullptr;
+}
+
+void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
+ if (Impl.log_init == nullptr || Impl.log_finalize == nullptr ||
+ Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) {
+ SpinMutexLock Guard(&XRayImplMutex);
+ GlobalXRayImpl = nullptr;
+ CurrentMode = nullptr;
+ __xray_remove_handler();
+ __xray_remove_handler_arg1();
+ return;
+ }
+
+ SpinMutexLock Guard(&XRayImplMutex);
+ CurrentXRayImpl = Impl;
+ GlobalXRayImpl = &CurrentXRayImpl;
+ __xray_set_handler(Impl.handle_arg0);
+}
+
+void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ GlobalXRayImpl = nullptr;
+ __xray_remove_handler();
+ __xray_remove_handler_arg1();
+}
+
+XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
+ void *Args,
+ size_t ArgsSize) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize);
+}
+
+XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config)
+ XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ if (Config == nullptr)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ // Check first whether the current mode is the same as what we expect.
+ if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ // Here we do some work to coerce the pointer we're provided, so that
+ // the implementations that still take void* pointers can handle the
+ // data provided in the Config argument.
+ return GlobalXRayImpl->log_init(
+ 0, 0, const_cast<void *>(static_cast<const void *>(Config)), 0);
+}
+
+XRayLogInitStatus
+__xray_log_init_mode_bin(const char *Mode, const char *Config,
+ size_t ConfigSize) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ if (Config == nullptr)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ // Check first whether the current mode is the same as what we expect.
+ if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ // Here we do some work to coerce the pointer we're provided, so that
+ // the implementations that still take void* pointers can handle the
+ // data provided in the Config argument.
+ return GlobalXRayImpl->log_init(
+ 0, 0, const_cast<void *>(static_cast<const void *>(Config)), ConfigSize);
+}
+
+XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ return GlobalXRayImpl->log_finalize();
+}
+
+XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ return GlobalXRayImpl->flush_log();
+}
+
+XRayLogFlushStatus __xray_log_process_buffers(
+ void (*Processor)(const char *, XRayBuffer)) XRAY_NEVER_INSTRUMENT {
+ // We want to make sure that there will be no changes to the global state for
+ // the log by synchronising on the XRayBufferIteratorMutex.
+ if (!GlobalXRayImpl)
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ auto Iterator = reinterpret_cast<XRayBuffer (*)(XRayBuffer)>(
+ atomic_load(&XRayBufferIterator, memory_order_acquire));
+ auto Buffer = (*Iterator)(XRayBuffer{nullptr, 0});
+ auto Mode = CurrentMode ? CurrentMode->Mode : nullptr;
+ while (Buffer.Data != nullptr) {
+ (*Processor)(Mode, Buffer);
+ Buffer = (*Iterator)(Buffer);
+ }
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp
new file mode 100644
index 000000000000..26fc50374471
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp
@@ -0,0 +1,170 @@
+//===-- xray_mips.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of MIPS-specific routines (32-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+ PO_ADDIU = 0x24000000, // addiu rt, rs, imm
+ PO_SW = 0xAC000000, // sw rt, offset(sp)
+ PO_LUI = 0x3C000000, // lui rs, %hi(address)
+ PO_ORI = 0x34000000, // ori rt, rs, %lo(address)
+ PO_JALR = 0x0000F809, // jalr rs
+ PO_LW = 0x8C000000, // lw rt, offset(address)
+ PO_B44 = 0x1000000b, // b #44
+ PO_NOP = 0x0, // nop
+};
+
+enum RegNum : uint32_t {
+ RN_T0 = 0x8,
+ RN_T9 = 0x19,
+ RN_RA = 0x1F,
+ RN_SP = 0x1D,
+};
+
+inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs,
+ uint32_t Rt,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Opcode | Rs << 21 | Rt << 16 | Imm);
+}
+
+inline static uint32_t
+encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B .tmpN
+ // 11 NOPs (44 bytes)
+ // .tmpN
+ // ADDIU T9, T9, 44
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n (32-bit):
+ // addiu sp, sp, -8 ;create stack frame
+ // nop
+ // sw ra, 4(sp) ;save return address
+ // sw t9, 0(sp) ;save register t9
+ // lui t9, %hi(__xray_FunctionEntry/Exit)
+ // ori t9, t9, %lo(__xray_FunctionEntry/Exit)
+ // lui t0, %hi(function_id)
+ // jalr t9 ;call Tracing hook
+ // ori t0, t0, %lo(function_id) ;pass function id (delay slot)
+ // lw t9, 0(sp) ;restore register t9
+ // lw ra, 4(sp) ;restore return address
+ // addiu sp, sp, 8 ;delete stack frame
+ //
+ // We add 44 bytes to t9 because we want to adjust the function pointer to
+ // the actual start of function i.e. the address just after the noop sled.
+ // We do this because gp displacement relocation is emitted at the start of
+ // of the function i.e after the nop sled and to correctly calculate the
+ // global offset table address, t9 must hold the address of the instruction
+ // containing the gp displacement relocation.
+ // FIXME: Is this correct for the static relocation model?
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #44
+
+ if (Enable) {
+ uint32_t LoTracingHookAddr =
+ reinterpret_cast<int32_t>(TracingHook) & 0xffff;
+ uint32_t HiTracingHookAddr =
+ (reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff;
+ uint32_t LoFunctionID = FuncId & 0xffff;
+ uint32_t HiFunctionID = (FuncId >> 16) & 0xffff;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction(
+ PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_RA, 0x4);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction(
+ PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HiTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 20) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeSpecialInstruction(
+ PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction(
+ PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction(
+ PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_RA, 0x4);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeInstruction(
+ PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x8);
+ uint32_t CreateStackSpaceInstr = encodeInstruction(
+ PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(CreateStackSpaceInstr), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(PatchOpcodes::PO_B44), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips?
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips?
+ return false;
+}
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp
new file mode 100644
index 000000000000..62c67ff7376d
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp
@@ -0,0 +1,178 @@
+//===-- xray_mips64.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of MIPS64-specific routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+ PO_DADDIU = 0x64000000, // daddiu rt, rs, imm
+ PO_SD = 0xFC000000, // sd rt, base(offset)
+ PO_LUI = 0x3C000000, // lui rt, imm
+ PO_ORI = 0x34000000, // ori rt, rs, imm
+ PO_DSLL = 0x00000038, // dsll rd, rt, sa
+ PO_JALR = 0x00000009, // jalr rs
+ PO_LD = 0xDC000000, // ld rt, base(offset)
+ PO_B60 = 0x1000000f, // b #60
+ PO_NOP = 0x0, // nop
+};
+
+enum RegNum : uint32_t {
+ RN_T0 = 0xC,
+ RN_T9 = 0x19,
+ RN_RA = 0x1F,
+ RN_SP = 0x1D,
+};
+
+inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs,
+ uint32_t Rt,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Opcode | Rs << 21 | Rt << 16 | Imm);
+}
+
+inline static uint32_t
+encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B .tmpN
+ // 15 NOPs (60 bytes)
+ // .tmpN
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n (64-bit):
+ // daddiu sp, sp, -16 ;create stack frame
+ // nop
+ // sd ra, 8(sp) ;save return address
+ // sd t9, 0(sp) ;save register t9
+ // lui t9, %highest(__xray_FunctionEntry/Exit)
+ // ori t9, t9, %higher(__xray_FunctionEntry/Exit)
+ // dsll t9, t9, 16
+ // ori t9, t9, %hi(__xray_FunctionEntry/Exit)
+ // dsll t9, t9, 16
+ // ori t9, t9, %lo(__xray_FunctionEntry/Exit)
+ // lui t0, %hi(function_id)
+ // jalr t9 ;call Tracing hook
+ // ori t0, t0, %lo(function_id) ;pass function id (delay slot)
+ // ld t9, 0(sp) ;restore register t9
+ // ld ra, 8(sp) ;restore return address
+ // daddiu sp, sp, 16 ;delete stack frame
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #60
+
+ if (Enable) {
+ uint32_t LoTracingHookAddr =
+ reinterpret_cast<int64_t>(TracingHook) & 0xffff;
+ uint32_t HiTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
+ uint32_t HigherTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff;
+ uint32_t HighestTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 48) & 0xffff;
+ uint32_t LoFunctionID = FuncId & 0xffff;
+ uint32_t HiFunctionID = (FuncId >> 16) & 0xffff;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction(
+ PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_RA, 0x8);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction(
+ PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HighestTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 20) =
+ encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9,
+ HigherTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeSpecialInstruction(
+ PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, HiTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeSpecialInstruction(
+ PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeSpecialInstruction(
+ PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 48) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 52) = encodeInstruction(
+ PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 56) = encodeInstruction(
+ PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_RA, 0x8);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 60) = encodeInstruction(
+ PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x10);
+ uint32_t CreateStackSpace = encodeInstruction(
+ PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ CreateStackSpace, std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(PatchOpcodes::PO_B60), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips64?
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips64?
+ return false;
+}
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt
new file mode 100644
index 000000000000..7fa48dda7e16
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# never instrument. You can use this as an argument to
+# -fxray-never-instrument=<path> along with your project-specific lists.
+
+# Never instrument any function whose symbol starts with __xray.
+fun:__xray*
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp
new file mode 100644
index 000000000000..c3553d848313
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp
@@ -0,0 +1,113 @@
+//===-- xray_powerpc64.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of powerpc64 and powerpc64le routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include "xray_utils.h"
+#include <atomic>
+#include <cassert>
+#include <cstring>
+
+#ifndef __LITTLE_ENDIAN__
+#error powerpc64 big endian is not supported for now.
+#endif
+
+namespace {
+
+constexpr unsigned long long JumpOverInstNum = 7;
+
+void clearCache(void *Addr, size_t Len) {
+ const size_t LineSize = 32;
+
+ const intptr_t Mask = ~(LineSize - 1);
+ const intptr_t StartLine = ((intptr_t)Addr) & Mask;
+ const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask;
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("dcbf 0, %0" : : "r"(Line));
+ asm volatile("sync");
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("icbi 0, %0" : : "r"(Line));
+ asm volatile("isync");
+}
+
+} // namespace
+
+extern "C" void __clear_cache(void *start, void *end);
+
+namespace __xray {
+
+bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ // lis 0, FuncId[16..32]
+ // li 0, FuncId[0..15]
+ *reinterpret_cast<uint64_t *>(Address) =
+ (0x3c000000ull + (FuncId >> 16)) +
+ ((0x60000000ull + (FuncId & 0xffff)) << 32);
+ } else {
+ // b +JumpOverInstNum instructions.
+ *reinterpret_cast<uint32_t *>(Address) =
+ 0x48000000ull + (JumpOverInstNum << 2);
+ }
+ clearCache(reinterpret_cast<void *>(Address), 8);
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ // lis 0, FuncId[16..32]
+ // li 0, FuncId[0..15]
+ *reinterpret_cast<uint64_t *>(Address) =
+ (0x3c000000ull + (FuncId >> 16)) +
+ ((0x60000000ull + (FuncId & 0xffff)) << 32);
+ } else {
+ // Copy the blr/b instruction after JumpOverInstNum instructions.
+ *reinterpret_cast<uint32_t *>(Address) =
+ *(reinterpret_cast<uint32_t *>(Address) + JumpOverInstNum);
+ }
+ clearCache(reinterpret_cast<void *>(Address), 8);
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchFunctionExit(Enable, FuncId, Sled);
+}
+
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in powerpc64?
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in powerpc64?
+ return false;
+}
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc
new file mode 100644
index 000000000000..e4e16d5b28e0
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc
@@ -0,0 +1,36 @@
+//===-- xray_powerpc64.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <mutex>
+#include <sys/platform/ppc.h>
+
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ CPU = 0;
+ return __ppc_get_timebase();
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ static std::mutex M;
+ std::lock_guard<std::mutex> Guard(M);
+ return __ppc_get_timebase_freq();
+}
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
+ return true;
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp
new file mode 100644
index 000000000000..bef2504f2a16
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp
@@ -0,0 +1,414 @@
+//===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the interface for the profileCollectorService.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_profile_collector.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_allocator.h"
+#include "xray_defs.h"
+#include "xray_profiling_flags.h"
+#include "xray_segmented_array.h"
+#include <memory>
+#include <pthread.h>
+#include <utility>
+
+namespace __xray {
+namespace profileCollectorService {
+
+namespace {
+
+SpinMutex GlobalMutex;
+struct ThreadTrie {
+ tid_t TId;
+ typename std::aligned_storage<sizeof(FunctionCallTrie)>::type TrieStorage;
+};
+
+struct ProfileBuffer {
+ void *Data;
+ size_t Size;
+};
+
+// Current version of the profile format.
+constexpr u64 XRayProfilingVersion = 0x20180424;
+
+// Identifier for XRay profiling files 'xrayprof' in hex.
+constexpr u64 XRayMagicBytes = 0x7872617970726f66;
+
+struct XRayProfilingFileHeader {
+ const u64 MagicBytes = XRayMagicBytes;
+ const u64 Version = XRayProfilingVersion;
+ u64 Timestamp = 0; // System time in nanoseconds.
+ u64 PID = 0; // Process ID.
+};
+
+struct BlockHeader {
+ u32 BlockSize;
+ u32 BlockNum;
+ u64 ThreadId;
+};
+
+struct ThreadData {
+ BufferQueue *BQ;
+ FunctionCallTrie::Allocators::Buffers Buffers;
+ FunctionCallTrie::Allocators Allocators;
+ FunctionCallTrie FCT;
+ tid_t TId;
+};
+
+using ThreadDataArray = Array<ThreadData>;
+using ThreadDataAllocator = ThreadDataArray::AllocatorType;
+
+// We use a separate buffer queue for the backing store for the allocator used
+// by the ThreadData array. This lets us host the buffers, allocators, and tries
+// associated with a thread by moving the data into the array instead of
+// attempting to copy the data to a separately backed set of tries.
+static typename std::aligned_storage<
+ sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+static BufferQueue::Buffer Buffer;
+static typename std::aligned_storage<sizeof(ThreadDataAllocator),
+ alignof(ThreadDataAllocator)>::type
+ ThreadDataAllocatorStorage;
+static typename std::aligned_storage<sizeof(ThreadDataArray),
+ alignof(ThreadDataArray)>::type
+ ThreadDataArrayStorage;
+
+static ThreadDataAllocator *TDAllocator = nullptr;
+static ThreadDataArray *TDArray = nullptr;
+
+using ProfileBufferArray = Array<ProfileBuffer>;
+using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType;
+
+// These need to be global aligned storage to avoid dynamic initialization. We
+// need these to be aligned to allow us to placement new objects into the
+// storage, and have pointers to those objects be appropriately aligned.
+static typename std::aligned_storage<sizeof(ProfileBufferArray)>::type
+ ProfileBuffersStorage;
+static typename std::aligned_storage<sizeof(ProfileBufferArrayAllocator)>::type
+ ProfileBufferArrayAllocatorStorage;
+
+static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr;
+static ProfileBufferArray *ProfileBuffers = nullptr;
+
+// Use a global flag to determine whether the collector implementation has been
+// initialized.
+static atomic_uint8_t CollectorInitialized{0};
+
+} // namespace
+
+void post(BufferQueue *Q, FunctionCallTrie &&T,
+ FunctionCallTrie::Allocators &&A,
+ FunctionCallTrie::Allocators::Buffers &&B,
+ tid_t TId) XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(Q, nullptr);
+
+ // Bail out early if the collector has not been initialized.
+ if (!atomic_load(&CollectorInitialized, memory_order_acquire)) {
+ T.~FunctionCallTrie();
+ A.~Allocators();
+ Q->releaseBuffer(B.NodeBuffer);
+ Q->releaseBuffer(B.RootsBuffer);
+ Q->releaseBuffer(B.ShadowStackBuffer);
+ Q->releaseBuffer(B.NodeIdPairBuffer);
+ B.~Buffers();
+ return;
+ }
+
+ {
+ SpinMutexLock Lock(&GlobalMutex);
+ DCHECK_NE(TDAllocator, nullptr);
+ DCHECK_NE(TDArray, nullptr);
+
+ if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T),
+ TId) == nullptr) {
+ // If we fail to add the data to the array, we should destroy the objects
+ // handed us.
+ T.~FunctionCallTrie();
+ A.~Allocators();
+ Q->releaseBuffer(B.NodeBuffer);
+ Q->releaseBuffer(B.RootsBuffer);
+ Q->releaseBuffer(B.ShadowStackBuffer);
+ Q->releaseBuffer(B.NodeIdPairBuffer);
+ B.~Buffers();
+ }
+ }
+}
+
+// A PathArray represents the function id's representing a stack trace. In this
+// context a path is almost always represented from the leaf function in a call
+// stack to a root of the call trie.
+using PathArray = Array<int32_t>;
+
+struct ProfileRecord {
+ using PathAllocator = typename PathArray::AllocatorType;
+
+ // The Path in this record is the function id's from the leaf to the root of
+ // the function call stack as represented from a FunctionCallTrie.
+ PathArray Path;
+ const FunctionCallTrie::Node *Node;
+};
+
+namespace {
+
+using ProfileRecordArray = Array<ProfileRecord>;
+
+// Walk a depth-first traversal of each root of the FunctionCallTrie to generate
+// the path(s) and the data associated with the path.
+static void
+populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA,
+ const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT {
+ using StackArray = Array<const FunctionCallTrie::Node *>;
+ using StackAllocator = typename StackArray::AllocatorType;
+ StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
+ StackArray DFSStack(StackAlloc);
+ for (const auto *R : Trie.getRoots()) {
+ DFSStack.Append(R);
+ while (!DFSStack.empty()) {
+ auto *Node = DFSStack.back();
+ DFSStack.trim(1);
+ if (Node == nullptr)
+ continue;
+ auto Record = PRs.AppendEmplace(PathArray{PA}, Node);
+ if (Record == nullptr)
+ return;
+ DCHECK_NE(Record, nullptr);
+
+ // Traverse the Node's parents and as we're doing so, get the FIds in
+ // the order they appear.
+ for (auto N = Node; N != nullptr; N = N->Parent)
+ Record->Path.Append(N->FId);
+ DCHECK(!Record->Path.empty());
+
+ for (const auto C : Node->Callees)
+ DFSStack.Append(C.NodePtr);
+ }
+ }
+}
+
+static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,
+ const ProfileRecordArray &ProfileRecords)
+ XRAY_NEVER_INSTRUMENT {
+ auto NextPtr = static_cast<uint8_t *>(
+ internal_memcpy(Buffer->Data, &Header, sizeof(Header))) +
+ sizeof(Header);
+ for (const auto &Record : ProfileRecords) {
+ // List of IDs follow:
+ for (const auto FId : Record.Path)
+ NextPtr =
+ static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) +
+ sizeof(FId);
+
+ // Add the sentinel here.
+ constexpr int32_t SentinelFId = 0;
+ NextPtr = static_cast<uint8_t *>(
+ internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) +
+ sizeof(SentinelFId);
+
+ // Add the node data here.
+ NextPtr =
+ static_cast<uint8_t *>(internal_memcpy(
+ NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) +
+ sizeof(Record.Node->CallCount);
+ NextPtr = static_cast<uint8_t *>(
+ internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime,
+ sizeof(Record.Node->CumulativeLocalTime))) +
+ sizeof(Record.Node->CumulativeLocalTime);
+ }
+
+ DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size);
+}
+
+} // namespace
+
+void serialize() XRAY_NEVER_INSTRUMENT {
+ if (!atomic_load(&CollectorInitialized, memory_order_acquire))
+ return;
+
+ SpinMutexLock Lock(&GlobalMutex);
+
+ // Clear out the global ProfileBuffers, if it's not empty.
+ for (auto &B : *ProfileBuffers)
+ deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size);
+ ProfileBuffers->trim(ProfileBuffers->size());
+
+ DCHECK_NE(TDArray, nullptr);
+ if (TDArray->empty())
+ return;
+
+ // Then repopulate the global ProfileBuffers.
+ u32 I = 0;
+ auto MaxSize = profilingFlags()->global_allocator_max;
+ auto ProfileArena = allocateBuffer(MaxSize);
+ if (ProfileArena == nullptr)
+ return;
+
+ auto ProfileArenaCleanup = at_scope_exit(
+ [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); });
+
+ auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max);
+ if (PathArena == nullptr)
+ return;
+
+ auto PathArenaCleanup = at_scope_exit(
+ [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); });
+
+ for (const auto &ThreadTrie : *TDArray) {
+ using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
+ ProfileRecordAllocator PRAlloc(ProfileArena,
+ profilingFlags()->global_allocator_max);
+ ProfileRecord::PathAllocator PathAlloc(
+ PathArena, profilingFlags()->global_allocator_max);
+ ProfileRecordArray ProfileRecords(PRAlloc);
+
+ // First, we want to compute the amount of space we're going to need. We'll
+ // use a local allocator and an __xray::Array<...> to store the intermediary
+ // data, then compute the size as we're going along. Then we'll allocate the
+ // contiguous space to contain the thread buffer data.
+ if (ThreadTrie.FCT.getRoots().empty())
+ continue;
+
+ populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT);
+ DCHECK(!ThreadTrie.FCT.getRoots().empty());
+ DCHECK(!ProfileRecords.empty());
+
+ // Go through each record, to compute the sizes.
+ //
+ // header size = block size (4 bytes)
+ // + block number (4 bytes)
+ // + thread id (8 bytes)
+ // record size = path ids (4 bytes * number of ids + sentinel 4 bytes)
+ // + call count (8 bytes)
+ // + local time (8 bytes)
+ // + end of record (8 bytes)
+ u32 CumulativeSizes = 0;
+ for (const auto &Record : ProfileRecords)
+ CumulativeSizes += 20 + (4 * Record.Path.size());
+
+ BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId};
+ auto B = ProfileBuffers->Append({});
+ B->Size = sizeof(Header) + CumulativeSizes;
+ B->Data = allocateBuffer(B->Size);
+ DCHECK_NE(B->Data, nullptr);
+ serializeRecords(B, Header, ProfileRecords);
+ }
+}
+
+void reset() XRAY_NEVER_INSTRUMENT {
+ atomic_store(&CollectorInitialized, 0, memory_order_release);
+ SpinMutexLock Lock(&GlobalMutex);
+
+ if (ProfileBuffers != nullptr) {
+ // Clear out the profile buffers that have been serialized.
+ for (auto &B : *ProfileBuffers)
+ deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size);
+ ProfileBuffers->trim(ProfileBuffers->size());
+ ProfileBuffers = nullptr;
+ }
+
+ if (TDArray != nullptr) {
+ // Release the resources as required.
+ for (auto &TD : *TDArray) {
+ TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer);
+ }
+ // We don't bother destroying the array here because we've already
+ // potentially freed the backing store for the array. Instead we're going to
+ // reset the pointer to nullptr, and re-use the storage later instead
+ // (placement-new'ing into the storage as-is).
+ TDArray = nullptr;
+ }
+
+ if (TDAllocator != nullptr) {
+ TDAllocator->~Allocator();
+ TDAllocator = nullptr;
+ }
+
+ if (Buffer.Data != nullptr) {
+ BQ->releaseBuffer(Buffer);
+ }
+
+ if (BQ == nullptr) {
+ bool Success = false;
+ new (&BufferQueueStorage)
+ BufferQueue(profilingFlags()->global_allocator_max, 1, Success);
+ if (!Success)
+ return;
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ } else {
+ BQ->finalize();
+
+ if (BQ->init(profilingFlags()->global_allocator_max, 1) !=
+ BufferQueue::ErrorCode::Ok)
+ return;
+ }
+
+ if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok)
+ return;
+
+ new (&ProfileBufferArrayAllocatorStorage)
+ ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max);
+ ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>(
+ &ProfileBufferArrayAllocatorStorage);
+
+ new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator);
+ ProfileBuffers =
+ reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage);
+
+ new (&ThreadDataAllocatorStorage)
+ ThreadDataAllocator(Buffer.Data, Buffer.Size);
+ TDAllocator =
+ reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage);
+ new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator);
+ TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage);
+
+ atomic_store(&CollectorInitialized, 1, memory_order_release);
+}
+
+XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock Lock(&GlobalMutex);
+
+ if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0)
+ return {nullptr, 0};
+
+ static pthread_once_t Once = PTHREAD_ONCE_INIT;
+ static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type
+ FileHeaderStorage;
+ pthread_once(
+ &Once, +[]() XRAY_NEVER_INSTRUMENT {
+ new (&FileHeaderStorage) XRayProfilingFileHeader{};
+ });
+
+ if (UNLIKELY(B.Data == nullptr)) {
+ // The first buffer should always contain the file header information.
+ auto &FileHeader =
+ *reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage);
+ FileHeader.Timestamp = NanoTime();
+ FileHeader.PID = internal_getpid();
+ return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)};
+ }
+
+ if (UNLIKELY(B.Data == &FileHeaderStorage))
+ return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
+
+ BlockHeader Header;
+ internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
+ auto NextBlock = Header.BlockNum + 1;
+ if (NextBlock < ProfileBuffers->size())
+ return {(*ProfileBuffers)[NextBlock].Data,
+ (*ProfileBuffers)[NextBlock].Size};
+ return {nullptr, 0};
+}
+
+} // namespace profileCollectorService
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h
new file mode 100644
index 000000000000..6e0f252714ba
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h
@@ -0,0 +1,73 @@
+//===-- xray_profile_collector.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This file defines the interface for a data collection service, for XRay
+// profiling. What we implement here is an in-process service where
+// FunctionCallTrie instances can be handed off by threads, to be
+// consolidated/collected.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_PROFILE_COLLECTOR_H
+#define XRAY_XRAY_PROFILE_COLLECTOR_H
+
+#include "xray_function_call_trie.h"
+
+#include "xray/xray_log_interface.h"
+
+namespace __xray {
+
+/// The ProfileCollectorService implements a centralised mechanism for
+/// collecting FunctionCallTrie instances, indexed by thread ID. On demand, the
+/// ProfileCollectorService can be queried for the most recent state of the
+/// data, in a form that allows traversal.
+namespace profileCollectorService {
+
+/// Posts the FunctionCallTrie associated with a specific Thread ID. This
+/// will:
+///
+/// Moves the collection of FunctionCallTrie, Allocators, and Buffers associated
+/// with a thread's data to the queue. This takes ownership of the memory
+/// associated with a thread, and manages those exclusively.
+///
+void post(BufferQueue *Q, FunctionCallTrie &&T,
+ FunctionCallTrie::Allocators &&A,
+ FunctionCallTrie::Allocators::Buffers &&B, tid_t TId);
+
+/// The serialize will process all FunctionCallTrie instances in memory, and
+/// turn those into specifically formatted blocks, each describing the
+/// function call trie's contents in a compact form. In memory, this looks
+/// like the following layout:
+///
+/// - block size (32 bits)
+/// - block number (32 bits)
+/// - thread id (64 bits)
+/// - list of records:
+/// - function ids in leaf to root order, terminated by
+/// 0 (32 bits per function id)
+/// - call count (64 bit)
+/// - cumulative local time (64 bit)
+/// - record delimiter (64 bit, 0x0)
+///
+void serialize();
+
+/// The reset function will clear out any internal memory held by the
+/// service. The intent is to have the resetting be done in calls to the
+/// initialization routine, or explicitly through the flush log API.
+void reset();
+
+/// This nextBuffer function is meant to implement the iterator functionality,
+/// provided in the XRay API.
+XRayBuffer nextBuffer(XRayBuffer B);
+
+} // namespace profileCollectorService
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_PROFILE_COLLECTOR_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp
new file mode 100644
index 000000000000..ef16691562cc
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp
@@ -0,0 +1,519 @@
+//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This is the implementation of a profiling handler.
+//
+//===----------------------------------------------------------------------===//
+#include <memory>
+#include <time.h>
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "xray/xray_interface.h"
+#include "xray/xray_log_interface.h"
+#include "xray_buffer_queue.h"
+#include "xray_flags.h"
+#include "xray_profile_collector.h"
+#include "xray_profiling_flags.h"
+#include "xray_recursion_guard.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
+#include <pthread.h>
+
+namespace __xray {
+
+namespace {
+
+static atomic_sint32_t ProfilerLogFlushStatus = {
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+static atomic_sint32_t ProfilerLogStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
+static SpinMutex ProfilerOptionsMutex;
+
+struct ProfilingData {
+ atomic_uintptr_t Allocators;
+ atomic_uintptr_t FCT;
+};
+
+static pthread_key_t ProfilingKey;
+
+// We use a global buffer queue, which gets initialized once at initialisation
+// time, and gets reset when profiling is "done".
+static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
+ BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+
+thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
+thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
+ alignof(FunctionCallTrie::Allocators)>::type
+ AllocatorsStorage;
+thread_local std::aligned_storage<sizeof(FunctionCallTrie),
+ alignof(FunctionCallTrie)>::type
+ FunctionCallTrieStorage;
+thread_local ProfilingData TLD{{0}, {0}};
+thread_local atomic_uint8_t ReentranceGuard{0};
+
+// We use a separate guard for ensuring that for this thread, if we're already
+// cleaning up, that any signal handlers don't attempt to cleanup nor
+// initialise.
+thread_local atomic_uint8_t TLDInitGuard{0};
+
+// We also use a separate latch to signal that the thread is exiting, and
+// non-essential work should be ignored (things like recording events, etc.).
+thread_local atomic_uint8_t ThreadExitingLatch{0};
+
+static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
+ thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
+ pthread_setspecific(ProfilingKey, &TLD);
+ return false;
+ }();
+ (void)ThreadOnce;
+
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return nullptr;
+
+ if (atomic_load_relaxed(&ThreadExitingLatch))
+ return nullptr;
+
+ uptr Allocators = 0;
+ if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
+ memory_order_acq_rel)) {
+ bool Success = false;
+ auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ atomic_store(&TLD.Allocators, 0, memory_order_release);
+ });
+
+ // Acquire a set of buffers for this thread.
+ if (BQ == nullptr)
+ return nullptr;
+
+ if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+
+ Success = true;
+ new (&AllocatorsStorage) FunctionCallTrie::Allocators(
+ FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
+ Allocators = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
+ atomic_store(&TLD.Allocators, Allocators, memory_order_release);
+ }
+
+ if (Allocators == 1)
+ return nullptr;
+
+ uptr FCT = 0;
+ if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
+ new (&FunctionCallTrieStorage)
+ FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
+ atomic_load_relaxed(&TLD.Allocators)));
+ FCT = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
+ atomic_store(&TLD.FCT, FCT, memory_order_release);
+ }
+
+ if (FCT == 1)
+ return nullptr;
+
+ return &TLD;
+}
+
+static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
+ auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
+ if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
+ &FunctionCallTrieStorage)))
+ reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
+
+ auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
+ if (Allocators ==
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
+}
+
+static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return;
+
+ uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
+ if (P != reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
+ DCHECK_NE(FCT, nullptr);
+
+ uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
+ if (A !=
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ return;
+
+ auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
+ DCHECK_NE(Allocators, nullptr);
+
+ // Always move the data into the profile collector.
+ profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
+ std::move(ThreadBuffers), GetTid());
+
+ // Re-initialize the ThreadBuffers object to a known "default" state.
+ ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
+}
+
+} // namespace
+
+const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
+#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
+ return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
+#else
+ return "";
+#endif
+}
+
+XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
+ if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
+ XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (Verbosity())
+ Report("Not flushing profiles, profiling not been finalized.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ RecursionGuard SignalGuard(ReentranceGuard);
+ if (!SignalGuard) {
+ if (Verbosity())
+ Report("Cannot finalize properly inside a signal handler!\n");
+ atomic_store(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+ memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+ memory_order_acq_rel);
+ if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
+ if (Verbosity())
+ Report("Not flushing profiles, implementation still flushing.\n");
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
+ }
+
+ // At this point, we'll create the file that will contain the profile, but
+ // only if the options say so.
+ if (!profilingFlags()->no_flush) {
+ // First check whether we have data in the profile collector service
+ // before we try and write anything down.
+ XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
+ if (B.Data == nullptr) {
+ if (Verbosity())
+ Report("profiling: No data to flush.\n");
+ } else {
+ LogWriter *LW = LogWriter::Open();
+ if (LW == nullptr) {
+ if (Verbosity())
+ Report("profiling: Failed to flush to file, dropping data.\n");
+ } else {
+ // Now for each of the buffers, write out the profile data as we would
+ // see it in memory, verbatim.
+ while (B.Data != nullptr && B.Size != 0) {
+ LW->WriteAll(reinterpret_cast<const char *>(B.Data),
+ reinterpret_cast<const char *>(B.Data) + B.Size);
+ B = profileCollectorService::nextBuffer(B);
+ }
+ }
+ LogWriter::Close(LW);
+ }
+ }
+
+ profileCollectorService::reset();
+
+ atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ memory_order_release);
+ atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+void profilingHandleArg0(int32_t FuncId,
+ XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+ unsigned char CPU;
+ auto TSC = readTSC(CPU);
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return;
+
+ auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
+ if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
+ Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
+ return;
+
+ if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
+ Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
+ postCurrentThreadFCT(TLD);
+ return;
+ }
+
+ auto T = getThreadLocalData();
+ if (T == nullptr)
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
+ switch (Entry) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ FCT->enterFunction(FuncId, TSC, CPU);
+ break;
+ case XRayEntryType::EXIT:
+ case XRayEntryType::TAIL:
+ FCT->exitFunction(FuncId, TSC, CPU);
+ break;
+ default:
+ // FIXME: Handle bugs.
+ break;
+ }
+}
+
+void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
+ uint64_t) XRAY_NEVER_INSTRUMENT {
+ return profilingHandleArg0(FuncId, Entry);
+}
+
+XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+ if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_FINALIZING,
+ memory_order_release)) {
+ if (Verbosity())
+ Report("Cannot finalize profile, the profiling is not initialized.\n");
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
+
+ // Mark then finalize the current generation of buffers. This allows us to let
+ // the threads currently holding onto new buffers still use them, but let the
+ // last reference do the memory cleanup.
+ DCHECK_NE(BQ, nullptr);
+ BQ->finalize();
+
+ // Wait a grace period to allow threads to see that we're finalizing.
+ SleepForMillis(profilingFlags()->grace_period_ms);
+
+ // If we for some reason are entering this function from an instrumented
+ // handler, we bail out.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ // Post the current thread's data if we have any.
+ postCurrentThreadFCT(TLD);
+
+ // Then we force serialize the log data.
+ profileCollectorService::serialize();
+
+ atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+XRayLogInitStatus
+profilingLoggingInit(size_t, size_t, void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+ memory_order_acq_rel)) {
+ if (Verbosity())
+ Report("Cannot initialize already initialised profiling "
+ "implementation.\n");
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
+
+ {
+ SpinMutexLock Lock(&ProfilerOptionsMutex);
+ FlagParser ConfigParser;
+ ProfilerFlags Flags;
+ Flags.setDefaults();
+ registerProfilerFlags(&ConfigParser, &Flags);
+ ConfigParser.ParseString(profilingCompilerDefinedFlags());
+ const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
+ if (Env == nullptr)
+ Env = "";
+ ConfigParser.ParseString(Env);
+
+ // Then parse the configuration string provided.
+ ConfigParser.ParseString(static_cast<const char *>(Options));
+ if (Verbosity())
+ ReportUnrecognizedFlags();
+ *profilingFlags() = Flags;
+ }
+
+ // We need to reset the profile data collection implementation now.
+ profileCollectorService::reset();
+
+ // Then also reset the buffer queue implementation.
+ if (BQ == nullptr) {
+ bool Success = false;
+ new (&BufferQueueStorage)
+ BufferQueue(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max, Success);
+ if (!Success) {
+ if (Verbosity())
+ Report("Failed to initialize preallocated memory buffers!");
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ // If we've succeded, set the global pointer to the initialised storage.
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ } else {
+ BQ->finalize();
+ auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max);
+
+ if (InitStatus != BufferQueue::ErrorCode::Ok) {
+ if (Verbosity())
+ Report("Failed to initialize preallocated memory buffers; error: %s",
+ BufferQueue::getErrorString(InitStatus));
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ DCHECK(!BQ->finalizing());
+ }
+
+ // We need to set up the exit handlers.
+ static pthread_once_t Once = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &Once, +[] {
+ pthread_key_create(
+ &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ if (P == nullptr)
+ return;
+
+ auto T = reinterpret_cast<ProfilingData *>(P);
+ if (atomic_load_relaxed(&T->Allocators) == 0)
+ return;
+
+ {
+ // If we're somehow executing this while inside a
+ // non-reentrant-friendly context, we skip attempting to post
+ // the current thread's data.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return;
+
+ postCurrentThreadFCT(*T);
+ }
+ });
+
+ // We also need to set up an exit handler, so that we can get the
+ // profile information at exit time. We use the C API to do this, to not
+ // rely on C++ ABI functions for registering exit handlers.
+ Atexit(+[]() XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ auto Cleanup =
+ at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
+
+ // Finalize and flush.
+ if (profilingFinalize() != XRAY_LOG_FINALIZED ||
+ profilingFlush() != XRAY_LOG_FLUSHED)
+ return;
+
+ if (Verbosity())
+ Report("XRay Profile flushed at exit.");
+ });
+ });
+
+ __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
+ __xray_set_handler(profilingHandleArg0);
+ __xray_set_handler_arg1(profilingHandleArg1);
+
+ atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+ memory_order_release);
+ if (Verbosity())
+ Report("XRay Profiling init successful.\n");
+
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
+ // Set up the flag defaults from the static defaults and the
+ // compiler-provided defaults.
+ {
+ SpinMutexLock Lock(&ProfilerOptionsMutex);
+ auto *F = profilingFlags();
+ F->setDefaults();
+ FlagParser ProfilingParser;
+ registerProfilerFlags(&ProfilingParser, F);
+ ProfilingParser.ParseString(profilingCompilerDefinedFlags());
+ }
+
+ XRayLogImpl Impl{
+ profilingLoggingInit,
+ profilingFinalize,
+ profilingHandleArg0,
+ profilingFlush,
+ };
+ auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
+ if (Verbosity())
+ Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
+ "%d\n",
+ RegistrationResult);
+ return false;
+ }
+
+ if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
+ __xray_log_select_mode("xray_profiling");
+ return true;
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = __xray::profilingDynamicInitializer();
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp
new file mode 100644
index 000000000000..0e89b7420f8c
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp
@@ -0,0 +1,39 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#include "xray_profiling_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+// Storage for the profiling flags.
+ProfilerFlags xray_profiling_flags_dont_use_directly;
+
+void ProfilerFlags::setDefaults() XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_profiling_flags.inc"
+#undef XRAY_FLAG
+}
+
+void registerProfilerFlags(FlagParser *P,
+ ProfilerFlags *F) XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
+ RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_profiling_flags.inc"
+#undef XRAY_FLAG
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h
new file mode 100644
index 000000000000..d67f240adc88
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h
@@ -0,0 +1,38 @@
+//===-- xray_profiling_flags.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay profiling runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_PROFILER_FLAGS_H
+#define XRAY_PROFILER_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+struct ProfilerFlags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_profiling_flags.inc"
+#undef XRAY_FLAG
+
+ void setDefaults();
+};
+
+extern ProfilerFlags xray_profiling_flags_dont_use_directly;
+inline ProfilerFlags *profilingFlags() {
+ return &xray_profiling_flags_dont_use_directly;
+}
+void registerProfilerFlags(FlagParser *P, ProfilerFlags *F);
+
+} // namespace __xray
+
+#endif // XRAY_PROFILER_FLAGS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc
new file mode 100644
index 000000000000..4f6138872af7
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc
@@ -0,0 +1,31 @@
+//===-- xray_profiling_flags.inc --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay profiling runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(uptr, per_thread_allocator_max, 16384,
+ "Maximum size of any single per-thread allocator.")
+XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
+ "Maximum size of the global allocator for profile storage.")
+XRAY_FLAG(uptr, stack_allocator_max, 2 << 20,
+ "Maximum size of the traversal stack allocator.")
+XRAY_FLAG(int, grace_period_ms, 1,
+ "Profile collection will wait this much time in milliseconds before "
+ "resetting the global state. This gives a chance to threads to "
+ "notice that the profiler has been finalized and clean up.")
+XRAY_FLAG(bool, no_flush, false,
+ "Set to true if we want the profiling implementation to not write "
+ "out files.")
+XRAY_FLAG(int, buffers_max, 128,
+ "The number of buffers to pre-allocate used by the profiling "
+ "implementation.")
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h
new file mode 100644
index 000000000000..3b6158a2d36c
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h
@@ -0,0 +1,56 @@
+//===-- xray_recursion_guard.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_RECURSION_GUARD_H
+#define XRAY_XRAY_RECURSION_GUARD_H
+
+#include "sanitizer_common/sanitizer_atomic.h"
+
+namespace __xray {
+
+/// The RecursionGuard is useful for guarding against signal handlers which are
+/// also potentially calling XRay-instrumented functions. To use the
+/// RecursionGuard, you'll typically need a thread_local atomic_uint8_t:
+///
+/// thread_local atomic_uint8_t Guard{0};
+///
+/// // In a handler function:
+/// void handleArg0(int32_t F, XRayEntryType T) {
+/// RecursionGuard G(Guard);
+/// if (!G)
+/// return; // Failed to acquire the guard.
+/// ...
+/// }
+///
+class RecursionGuard {
+ atomic_uint8_t &Running;
+ const bool Valid;
+
+public:
+ explicit inline RecursionGuard(atomic_uint8_t &R)
+ : Running(R), Valid(!atomic_exchange(&R, 1, memory_order_acq_rel)) {}
+
+ inline RecursionGuard(const RecursionGuard &) = delete;
+ inline RecursionGuard(RecursionGuard &&) = delete;
+ inline RecursionGuard &operator=(const RecursionGuard &) = delete;
+ inline RecursionGuard &operator=(RecursionGuard &&) = delete;
+
+ explicit inline operator bool() const { return Valid; }
+
+ inline ~RecursionGuard() noexcept {
+ if (Valid)
+ atomic_store(&Running, 0, memory_order_release);
+ }
+};
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_RECURSION_GUARD_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h
new file mode 100644
index 000000000000..6eb673edffea
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h
@@ -0,0 +1,650 @@
+//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the implementation of a segmented array, with fixed-size segments
+// backing the segments.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_SEGMENTED_ARRAY_H
+#define XRAY_SEGMENTED_ARRAY_H
+
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "xray_allocator.h"
+#include "xray_utils.h"
+#include <cassert>
+#include <type_traits>
+#include <utility>
+
+namespace __xray {
+
+/// The Array type provides an interface similar to std::vector<...> but does
+/// not shrink in size. Once constructed, elements can be appended but cannot be
+/// removed. The implementation is heavily dependent on the contract provided by
+/// the Allocator type, in that all memory will be released when the Allocator
+/// is destroyed. When an Array is destroyed, it will destroy elements in the
+/// backing store but will not free the memory.
+template <class T> class Array {
+ struct Segment {
+ Segment *Prev;
+ Segment *Next;
+ char Data[1];
+ };
+
+public:
+ // Each segment of the array will be laid out with the following assumptions:
+ //
+ // - Each segment will be on a cache-line address boundary (kCacheLineSize
+ // aligned).
+ //
+ // - The elements will be accessed through an aligned pointer, dependent on
+ // the alignment of T.
+ //
+ // - Each element is at least two-pointers worth from the beginning of the
+ // Segment, aligned properly, and the rest of the elements are accessed
+ // through appropriate alignment.
+ //
+ // We then compute the size of the segment to follow this logic:
+ //
+ // - Compute the number of elements that can fit within
+ // kCacheLineSize-multiple segments, minus the size of two pointers.
+ //
+ // - Request cacheline-multiple sized elements from the allocator.
+ static constexpr uint64_t AlignedElementStorageSize =
+ sizeof(typename std::aligned_storage<sizeof(T), alignof(T)>::type);
+
+ static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2;
+
+ static constexpr uint64_t SegmentSize = nearest_boundary(
+ SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize);
+
+ using AllocatorType = Allocator<SegmentSize>;
+
+ static constexpr uint64_t ElementsPerSegment =
+ (SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T));
+
+ static_assert(ElementsPerSegment > 0,
+ "Must have at least 1 element per segment.");
+
+ static Segment SentinelSegment;
+
+ using size_type = uint64_t;
+
+private:
+ // This Iterator models a BidirectionalIterator.
+ template <class U> class Iterator {
+ Segment *S = &SentinelSegment;
+ uint64_t Offset = 0;
+ uint64_t Size = 0;
+
+ public:
+ Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT
+ : S(IS),
+ Offset(Off),
+ Size(S) {}
+ Iterator(const Iterator &) NOEXCEPT XRAY_NEVER_INSTRUMENT = default;
+ Iterator() NOEXCEPT XRAY_NEVER_INSTRUMENT = default;
+ Iterator(Iterator &&) NOEXCEPT XRAY_NEVER_INSTRUMENT = default;
+ Iterator &operator=(const Iterator &) XRAY_NEVER_INSTRUMENT = default;
+ Iterator &operator=(Iterator &&) XRAY_NEVER_INSTRUMENT = default;
+ ~Iterator() XRAY_NEVER_INSTRUMENT = default;
+
+ Iterator &operator++() XRAY_NEVER_INSTRUMENT {
+ if (++Offset % ElementsPerSegment || Offset == Size)
+ return *this;
+
+ // At this point, we know that Offset % N == 0, so we must advance the
+ // segment pointer.
+ DCHECK_EQ(Offset % ElementsPerSegment, 0);
+ DCHECK_NE(Offset, Size);
+ DCHECK_NE(S, &SentinelSegment);
+ DCHECK_NE(S->Next, &SentinelSegment);
+ S = S->Next;
+ DCHECK_NE(S, &SentinelSegment);
+ return *this;
+ }
+
+ Iterator &operator--() XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(S, &SentinelSegment);
+ DCHECK_GT(Offset, 0);
+
+ auto PreviousOffset = Offset--;
+ if (PreviousOffset != Size && PreviousOffset % ElementsPerSegment == 0) {
+ DCHECK_NE(S->Prev, &SentinelSegment);
+ S = S->Prev;
+ }
+
+ return *this;
+ }
+
+ Iterator operator++(int) XRAY_NEVER_INSTRUMENT {
+ Iterator Copy(*this);
+ ++(*this);
+ return Copy;
+ }
+
+ Iterator operator--(int) XRAY_NEVER_INSTRUMENT {
+ Iterator Copy(*this);
+ --(*this);
+ return Copy;
+ }
+
+ template <class V, class W>
+ friend bool operator==(const Iterator<V> &L,
+ const Iterator<W> &R) XRAY_NEVER_INSTRUMENT {
+ return L.S == R.S && L.Offset == R.Offset;
+ }
+
+ template <class V, class W>
+ friend bool operator!=(const Iterator<V> &L,
+ const Iterator<W> &R) XRAY_NEVER_INSTRUMENT {
+ return !(L == R);
+ }
+
+ U &operator*() const XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(S, &SentinelSegment);
+ auto RelOff = Offset % ElementsPerSegment;
+
+ // We need to compute the character-aligned pointer, offset from the
+ // segment's Data location to get the element in the position of Offset.
+ auto Base = &S->Data;
+ auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize);
+ return *reinterpret_cast<U *>(AlignedOffset);
+ }
+
+ U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); }
+ };
+
+ AllocatorType *Alloc;
+ Segment *Head;
+ Segment *Tail;
+
+ // Here we keep track of segments in the freelist, to allow us to re-use
+ // segments when elements are trimmed off the end.
+ Segment *Freelist;
+ uint64_t Size;
+
+ // ===============================
+ // In the following implementation, we work through the algorithms and the
+ // list operations using the following notation:
+ //
+ // - pred(s) is the predecessor (previous node accessor) and succ(s) is
+ // the successor (next node accessor).
+ //
+ // - S is a sentinel segment, which has the following property:
+ //
+ // pred(S) == succ(S) == S
+ //
+ // - @ is a loop operator, which can imply pred(s) == s if it appears on
+ // the left of s, or succ(s) == S if it appears on the right of s.
+ //
+ // - sL <-> sR : means a bidirectional relation between sL and sR, which
+ // means:
+ //
+ // succ(sL) == sR && pred(SR) == sL
+ //
+ // - sL -> sR : implies a unidirectional relation between sL and SR,
+ // with the following properties:
+ //
+ // succ(sL) == sR
+ //
+ // sL <- sR : implies a unidirectional relation between sR and sL,
+ // with the following properties:
+ //
+ // pred(sR) == sL
+ //
+ // ===============================
+
+ Segment *NewSegment() XRAY_NEVER_INSTRUMENT {
+ // We need to handle the case in which enough elements have been trimmed to
+ // allow us to re-use segments we've allocated before. For this we look into
+ // the Freelist, to see whether we need to actually allocate new blocks or
+ // just re-use blocks we've already seen before.
+ if (Freelist != &SentinelSegment) {
+ // The current state of lists resemble something like this at this point:
+ //
+ // Freelist: @S@<-f0->...<->fN->@S@
+ // ^ Freelist
+ //
+ // We want to perform a splice of `f0` from Freelist to a temporary list,
+ // which looks like:
+ //
+ // Templist: @S@<-f0->@S@
+ // ^ FreeSegment
+ //
+ // Our algorithm preconditions are:
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+
+ // Then the algorithm we implement is:
+ //
+ // SFS = Freelist
+ // Freelist = succ(Freelist)
+ // if (Freelist != S)
+ // pred(Freelist) = S
+ // succ(SFS) = S
+ // pred(SFS) = S
+ //
+ auto *FreeSegment = Freelist;
+ Freelist = Freelist->Next;
+
+ // Note that we need to handle the case where Freelist is now pointing to
+ // S, which we don't want to be overwriting.
+ // TODO: Determine whether the cost of the branch is higher than the cost
+ // of the blind assignment.
+ if (Freelist != &SentinelSegment)
+ Freelist->Prev = &SentinelSegment;
+
+ FreeSegment->Next = &SentinelSegment;
+ FreeSegment->Prev = &SentinelSegment;
+
+ // Our postconditions are:
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ DCHECK_NE(FreeSegment, &SentinelSegment);
+ return FreeSegment;
+ }
+
+ auto SegmentBlock = Alloc->Allocate();
+ if (SegmentBlock.Data == nullptr)
+ return nullptr;
+
+ // Placement-new the Segment element at the beginning of the SegmentBlock.
+ new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}};
+ auto SB = reinterpret_cast<Segment *>(SegmentBlock.Data);
+ return SB;
+ }
+
+ Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT {
+ DCHECK_EQ(Head, &SentinelSegment);
+ DCHECK_EQ(Tail, &SentinelSegment);
+ auto S = NewSegment();
+ if (S == nullptr)
+ return nullptr;
+ DCHECK_EQ(S->Next, &SentinelSegment);
+ DCHECK_EQ(S->Prev, &SentinelSegment);
+ DCHECK_NE(S, &SentinelSegment);
+ Head = S;
+ Tail = S;
+ DCHECK_EQ(Head, Tail);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Tail->Prev, &SentinelSegment);
+ return S;
+ }
+
+ Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT {
+ auto S = NewSegment();
+ if (S == nullptr)
+ return nullptr;
+ DCHECK_NE(Tail, &SentinelSegment);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(S->Prev, &SentinelSegment);
+ DCHECK_EQ(S->Next, &SentinelSegment);
+ S->Prev = Tail;
+ Tail->Next = S;
+ Tail = S;
+ DCHECK_EQ(S, S->Prev->Next);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ return S;
+ }
+
+public:
+ explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT
+ : Alloc(&A),
+ Head(&SentinelSegment),
+ Tail(&SentinelSegment),
+ Freelist(&SentinelSegment),
+ Size(0) {}
+
+ Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr),
+ Head(&SentinelSegment),
+ Tail(&SentinelSegment),
+ Freelist(&SentinelSegment),
+ Size(0) {}
+
+ Array(const Array &) = delete;
+ Array &operator=(const Array &) = delete;
+
+ Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc),
+ Head(O.Head),
+ Tail(O.Tail),
+ Freelist(O.Freelist),
+ Size(O.Size) {
+ O.Alloc = nullptr;
+ O.Head = &SentinelSegment;
+ O.Tail = &SentinelSegment;
+ O.Size = 0;
+ O.Freelist = &SentinelSegment;
+ }
+
+ Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT {
+ Alloc = O.Alloc;
+ O.Alloc = nullptr;
+ Head = O.Head;
+ O.Head = &SentinelSegment;
+ Tail = O.Tail;
+ O.Tail = &SentinelSegment;
+ Freelist = O.Freelist;
+ O.Freelist = &SentinelSegment;
+ Size = O.Size;
+ O.Size = 0;
+ return *this;
+ }
+
+ ~Array() XRAY_NEVER_INSTRUMENT {
+ for (auto &E : *this)
+ (&E)->~T();
+ }
+
+ bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; }
+
+ AllocatorType &allocator() const XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(Alloc, nullptr);
+ return *Alloc;
+ }
+
+ uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; }
+
+ template <class... Args>
+ T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT {
+ DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ if (UNLIKELY(Head == &SentinelSegment)) {
+ auto R = InitHeadAndTail();
+ if (R == nullptr)
+ return nullptr;
+ }
+
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
+
+ auto Offset = Size % ElementsPerSegment;
+ if (UNLIKELY(Size != 0 && Offset == 0))
+ if (AppendNewSegment() == nullptr)
+ return nullptr;
+
+ DCHECK_NE(Tail, &SentinelSegment);
+ auto Base = &Tail->Data;
+ auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
+ DCHECK_LE(AlignedOffset + sizeof(T),
+ reinterpret_cast<unsigned char *>(Base) + SegmentSize);
+
+ // In-place construct at Position.
+ new (AlignedOffset) T{std::forward<Args>(args)...};
+ ++Size;
+ return reinterpret_cast<T *>(AlignedOffset);
+ }
+
+ T *Append(const T &E) XRAY_NEVER_INSTRUMENT {
+ // FIXME: This is a duplication of AppenEmplace with the copy semantics
+ // explicitly used, as a work-around to GCC 4.8 not invoking the copy
+ // constructor with the placement new with braced-init syntax.
+ DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ if (UNLIKELY(Head == &SentinelSegment)) {
+ auto R = InitHeadAndTail();
+ if (R == nullptr)
+ return nullptr;
+ }
+
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
+
+ auto Offset = Size % ElementsPerSegment;
+ if (UNLIKELY(Size != 0 && Offset == 0))
+ if (AppendNewSegment() == nullptr)
+ return nullptr;
+
+ DCHECK_NE(Tail, &SentinelSegment);
+ auto Base = &Tail->Data;
+ auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
+ DCHECK_LE(AlignedOffset + sizeof(T),
+ reinterpret_cast<unsigned char *>(Tail) + SegmentSize);
+
+ // In-place construct at Position.
+ new (AlignedOffset) T(E);
+ ++Size;
+ return reinterpret_cast<T *>(AlignedOffset);
+ }
+
+ T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT {
+ DCHECK_LE(Offset, Size);
+ // We need to traverse the array enough times to find the element at Offset.
+ auto S = Head;
+ while (Offset >= ElementsPerSegment) {
+ S = S->Next;
+ Offset -= ElementsPerSegment;
+ DCHECK_NE(S, &SentinelSegment);
+ }
+ auto Base = &S->Data;
+ auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
+ auto Position = reinterpret_cast<T *>(AlignedOffset);
+ return *reinterpret_cast<T *>(Position);
+ }
+
+ T &front() const XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Size, 0u);
+ return *begin();
+ }
+
+ T &back() const XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(Tail, &SentinelSegment);
+ DCHECK_NE(Size, 0u);
+ auto It = end();
+ --It;
+ return *It;
+ }
+
+ template <class Predicate>
+ T *find_element(Predicate P) const XRAY_NEVER_INSTRUMENT {
+ if (empty())
+ return nullptr;
+
+ auto E = end();
+ for (auto I = begin(); I != E; ++I)
+ if (P(*I))
+ return &(*I);
+
+ return nullptr;
+ }
+
+ /// Remove N Elements from the end. This leaves the blocks behind, and not
+ /// require allocation of new blocks for new elements added after trimming.
+ void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT {
+ auto OldSize = Size;
+ Elements = Elements > Size ? Size : Elements;
+ Size -= Elements;
+
+ // We compute the number of segments we're going to return from the tail by
+ // counting how many elements have been trimmed. Given the following:
+ //
+ // - Each segment has N valid positions, where N > 0
+ // - The previous size > current size
+ //
+ // To compute the number of segments to return, we need to perform the
+ // following calculations for the number of segments required given 'x'
+ // elements:
+ //
+ // f(x) = {
+ // x == 0 : 0
+ // , 0 < x <= N : 1
+ // , N < x <= max : x / N + (x % N ? 1 : 0)
+ // }
+ //
+ // We can simplify this down to:
+ //
+ // f(x) = {
+ // x == 0 : 0,
+ // , 0 < x <= max : x / N + (x < N || x % N ? 1 : 0)
+ // }
+ //
+ // And further down to:
+ //
+ // f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0
+ //
+ // We can then perform the following calculation `s` which counts the number
+ // of segments we need to remove from the end of the data structure:
+ //
+ // s(p, c) = f(p) - f(c)
+ //
+ // If we treat p = previous size, and c = current size, and given the
+ // properties above, the possible range for s(...) is [0..max(typeof(p))/N]
+ // given that typeof(p) == typeof(c).
+ auto F = [](uint64_t X) {
+ return X ? (X / ElementsPerSegment) +
+ (X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0)
+ : 0;
+ };
+ auto PS = F(OldSize);
+ auto CS = F(Size);
+ DCHECK_GE(PS, CS);
+ auto SegmentsToTrim = PS - CS;
+ for (auto I = 0uL; I < SegmentsToTrim; ++I) {
+ // Here we place the current tail segment to the freelist. To do this
+ // appropriately, we need to perform a splice operation on two
+ // bidirectional linked-lists. In particular, we have the current state of
+ // the doubly-linked list of segments:
+ //
+ // @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@
+ //
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+
+ if (Freelist == &SentinelSegment) {
+ // Our two lists at this point are in this configuration:
+ //
+ // Freelist: (potentially) @S@
+ // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
+ // ^ Head ^ Tail
+ //
+ // The end state for us will be this configuration:
+ //
+ // Freelist: @S@<-sT->@S@
+ // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
+ // ^ Head ^ Tail
+ //
+ // The first step for us is to hold a reference to the tail of Mainlist,
+ // which in our notation is represented by sT. We call this our "free
+ // segment" which is the segment we are placing on the Freelist.
+ //
+ // sF = sT
+ //
+ // Then, we also hold a reference to the "pre-tail" element, which we
+ // call sPT:
+ //
+ // sPT = pred(sT)
+ //
+ // We want to splice sT into the beginning of the Freelist, which in
+ // an empty Freelist means placing a segment whose predecessor and
+ // successor is the sentinel segment.
+ //
+ // The splice operation then can be performed in the following
+ // algorithm:
+ //
+ // succ(sPT) = S
+ // pred(sT) = S
+ // succ(sT) = Freelist
+ // Freelist = sT
+ // Tail = sPT
+ //
+ auto SPT = Tail->Prev;
+ SPT->Next = &SentinelSegment;
+ Tail->Prev = &SentinelSegment;
+ Tail->Next = Freelist;
+ Freelist = Tail;
+ Tail = SPT;
+
+ // Our post-conditions here are:
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ } else {
+ // In the other case, where the Freelist is not empty, we perform the
+ // following transformation instead:
+ //
+ // This transforms the current state:
+ //
+ // Freelist: @S@<-f0->@S@
+ // ^ Freelist
+ // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
+ // ^ Head ^ Tail
+ //
+ // Into the following:
+ //
+ // Freelist: @S@<-sT<->f0->@S@
+ // ^ Freelist
+ // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
+ // ^ Head ^ Tail
+ //
+ // The algorithm is:
+ //
+ // sFH = Freelist
+ // sPT = pred(sT)
+ // pred(SFH) = sT
+ // succ(sT) = Freelist
+ // pred(sT) = S
+ // succ(sPT) = S
+ // Tail = sPT
+ // Freelist = sT
+ //
+ auto SFH = Freelist;
+ auto SPT = Tail->Prev;
+ auto ST = Tail;
+ SFH->Prev = ST;
+ ST->Next = Freelist;
+ ST->Prev = &SentinelSegment;
+ SPT->Next = &SentinelSegment;
+ Tail = SPT;
+ Freelist = ST;
+
+ // Our post-conditions here are:
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ DCHECK_EQ(Freelist->Next->Prev, Freelist);
+ }
+ }
+
+ // Now in case we've spliced all the segments in the end, we ensure that the
+ // main list is "empty", or both the head and tail pointing to the sentinel
+ // segment.
+ if (Tail == &SentinelSegment)
+ Head = Tail;
+
+ DCHECK(
+ (Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ DCHECK(
+ (Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) ||
+ (Freelist == &SentinelSegment && Tail->Next == &SentinelSegment));
+ }
+
+ // Provide iterators.
+ Iterator<T> begin() const XRAY_NEVER_INSTRUMENT {
+ return Iterator<T>(Head, 0, Size);
+ }
+ Iterator<T> end() const XRAY_NEVER_INSTRUMENT {
+ return Iterator<T>(Tail, Size, Size);
+ }
+ Iterator<const T> cbegin() const XRAY_NEVER_INSTRUMENT {
+ return Iterator<const T>(Head, 0, Size);
+ }
+ Iterator<const T> cend() const XRAY_NEVER_INSTRUMENT {
+ return Iterator<const T>(Tail, Size, Size);
+ }
+};
+
+// We need to have this storage definition out-of-line so that the compiler can
+// ensure that storage for the SentinelSegment is defined and has a single
+// address.
+template <class T>
+typename Array<T>::Segment Array<T>::SentinelSegment{
+ &Array<T>::SentinelSegment, &Array<T>::SentinelSegment, {'\0'}};
+
+} // namespace __xray
+
+#endif // XRAY_SEGMENTED_ARRAY_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S
new file mode 100644
index 000000000000..3bf52cef60fe
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S
@@ -0,0 +1,163 @@
+#include "../builtins/assembly.h"
+
+ .text
+ /* The variable containing the handler function pointer */
+ .global _ZN6__xray19XRayPatchedFunctionE
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionEntry
+ .hidden __xray_FunctionEntry
+ .type __xray_FunctionEntry, %function
+ /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionEntry:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* X8 is the indirect result register and needs to be preserved for the body
+ of the function to use */
+ STP X8, X0, [SP, #-16]!
+
+ /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ ADRP X1, _ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionEntry_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=0 means that we are tracing an entry event */
+ MOV X1, #0
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionEntry_restore:
+ /* Pop the saved registers */
+ LDP X8, X0, [SP], #16
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionExit
+ .hidden __xray_FunctionExit
+ .type __xray_FunctionExit, %function
+ /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionExit:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* X8 is the indirect result register and needs to be preserved for the body
+ of the function to use */
+ STP X8, X0, [SP, #-16]!
+
+ /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ ADRP X1, _ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionExit_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=1 means that we are tracing an exit event */
+ MOV X1, #1
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionExit_restore:
+ LDP X8, X0, [SP], #16
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionTailExit
+ .hidden __xray_FunctionTailExit
+ .type __xray_FunctionTailExit, %function
+ /* In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId)
+ with FuncId passed in W0 register. */
+__xray_FunctionTailExit:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ /* Push the parameters of the tail called function */
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ ADRP X1, _ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionTailExit_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=2 means that we are tracing a tail exit event, but before the
+ logging part of XRay is ready, we pretend that here a normal function
+ exit happens, so we give the handler code 1 */
+ MOV X1, #1
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionTailExit_restore:
+ /* Pop the parameters of the tail called function */
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ /* Pop the registers which may be modified by the handler function */
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S
new file mode 100644
index 000000000000..3ffc1e443761
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S
@@ -0,0 +1,105 @@
+#include "../builtins/assembly.h"
+
+ .syntax unified
+ .arch armv6t2
+ .fpu vfpv2
+ .code 32
+ .global _ZN6__xray19XRayPatchedFunctionE
+
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionEntry
+ .hidden __xray_FunctionEntry
+ @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
+ @ Assume that "q" part of the floating-point registers is not used
+ @ for passing parameters to C/C++ functions.
+ .type __xray_FunctionEntry, %function
+ @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+ @ FuncId passed in r0 register.
+__xray_FunctionEntry:
+ PUSH {r1-r3,lr}
+ @ Save floating-point parameters of the instrumented function
+ VPUSH {d0-d7}
+ MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16)
+ MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12)
+ LDR r2, [pc, r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionEntry_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ r1=0 means that we are tracing an entry event
+ MOV r1, #0
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionEntry_restore:
+ @ Restore floating-point parameters of the instrumented function
+ VPOP {d0-d7}
+ POP {r1-r3,pc}
+
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionExit
+ .hidden __xray_FunctionExit
+ @ Assume that d1-d7 are not used for the return value.
+ @ Assume that "q" part of the floating-point registers is not used for the
+ @ return value in C/C++.
+ .type __xray_FunctionExit, %function
+ @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with
+ @ FuncId passed in r0 register.
+__xray_FunctionExit:
+ PUSH {r1-r3,lr}
+ @ Save the floating-point return value of the instrumented function
+ VPUSH {d0}
+ @ Load the handler address
+ MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16)
+ MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12)
+ LDR r2, [pc, r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionExit_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ 1 means that we are tracing an exit event
+ MOV r1, #1
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionExit_restore:
+ @ Restore the floating-point return value of the instrumented function
+ VPOP {d0}
+ POP {r1-r3,pc}
+
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionTailExit
+ .hidden __xray_FunctionTailExit
+ @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
+ @ Assume that "q" part of the floating-point registers is not used
+ @ for passing parameters to C/C++ functions.
+ .type __xray_FunctionTailExit, %function
+ @ In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId)
+ @ with FuncId passed in r0 register.
+__xray_FunctionTailExit:
+ PUSH {r1-r3,lr}
+ @ Save floating-point parameters of the instrumented function
+ VPUSH {d0-d7}
+ MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16)
+ MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12)
+ LDR r2, [pc, r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionTailExit_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ r1=2 means that we are tracing a tail exit event
+ @ But before the logging part of XRay is ready, we pretend that here a
+ @ normal function exit happens, so we give the handler code 1
+ MOV r1, #1
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionTailExit_restore:
+ @ Restore floating-point parameters of the instrumented function
+ VPOP {d0-d7}
+ POP {r1-r3,pc}
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S
new file mode 100644
index 000000000000..499c350d2a24
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S
@@ -0,0 +1,109 @@
+//===-- xray_trampoline_mips.s ----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the MIPS-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+ .file "xray_trampoline_mips.S"
+ .globl __xray_FunctionEntry
+ .p2align 2
+ .type __xray_FunctionEntry,@function
+__xray_FunctionEntry:
+ .cfi_startproc
+ .set noreorder
+ .cpload $t9
+ .set reorder
+ // Save argument registers before doing any actual work
+ .cfi_def_cfa_offset 36
+ addiu $sp, $sp, -36
+ sw $ra, 32($sp)
+ .cfi_offset 31, -4
+ sw $a3, 28($sp)
+ sw $a2, 24($sp)
+ sw $a1, 20($sp)
+ sw $a0, 16($sp)
+ sdc1 $f14, 8($sp)
+ sdc1 $f12, 0($sp)
+
+ la $t9, _ZN6__xray19XRayPatchedFunctionE
+ lw $t9, 0($t9)
+
+ beqz $t9, FunctionEntry_restore
+
+ // a1=0 means that we are tracing an entry event
+ move $a1, $zero
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionEntry_restore:
+ // Restore argument registers
+ ldc1 $f12, 0($sp)
+ ldc1 $f14, 8($sp)
+ lw $a0, 16($sp)
+ lw $a1, 20($sp)
+ lw $a2, 24($sp)
+ lw $a3, 28($sp)
+ lw $ra, 32($sp)
+ addiu $sp, $sp, 36
+ jr $ra
+FunctionEntry_end:
+ .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry
+ .cfi_endproc
+
+ .text
+ .globl __xray_FunctionExit
+ .p2align 2
+ .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+ .cfi_startproc
+ .set noreorder
+ .cpload $t9
+ .set reorder
+ // Save return registers before doing any actual work.
+ .cfi_def_cfa_offset 36
+ addiu $sp, $sp, -36
+ sw $ra, 32($sp)
+ .cfi_offset 31, -4
+ sw $a1, 28($sp)
+ sw $a0, 24($sp)
+ sw $v1, 20($sp)
+ sw $v0, 16($sp)
+ sdc1 $f2, 8($sp)
+ sdc1 $f0, 0($sp)
+
+ la $t9, _ZN6__xray19XRayPatchedFunctionE
+ lw $t9, 0($t9)
+
+ beqz $t9, FunctionExit_restore
+
+ // a1=1 means that we are tracing an exit event
+ li $a1, 1
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionExit_restore:
+ // Restore return registers
+ ldc1 $f0, 0($sp)
+ ldc1 $f2, 8($sp)
+ lw $v0, 16($sp)
+ lw $v1, 20($sp)
+ lw $a0, 24($sp)
+ lw $a1, 28($sp)
+ lw $ra, 32($sp)
+ addiu $sp, $sp, 36
+ jr $ra
+
+FunctionExit_end:
+ .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit
+ .cfi_endproc
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S
new file mode 100644
index 000000000000..d65bec1fc687
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S
@@ -0,0 +1,135 @@
+//===-- xray_trampoline_mips64.s --------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the MIPS64-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+ .file "xray_trampoline_mips64.S"
+ .globl __xray_FunctionEntry
+ .p2align 2
+ .type __xray_FunctionEntry,@function
+__xray_FunctionEntry:
+ .cfi_startproc
+ // Save argument registers before doing any actual work.
+ .cfi_def_cfa_offset 144
+ daddiu $sp, $sp, -144
+ sd $ra, 136($sp)
+ .cfi_offset 31, -8
+ sd $gp, 128($sp)
+ sd $a7, 120($sp)
+ sd $a6, 112($sp)
+ sd $a5, 104($sp)
+ sd $a4, 96($sp)
+ sd $a3, 88($sp)
+ sd $a2, 80($sp)
+ sd $a1, 72($sp)
+ sd $a0, 64($sp)
+ sdc1 $f19, 56($sp)
+ sdc1 $f18, 48($sp)
+ sdc1 $f17, 40($sp)
+ sdc1 $f16, 32($sp)
+ sdc1 $f15, 24($sp)
+ sdc1 $f14, 16($sp)
+ sdc1 $f13, 8($sp)
+ sdc1 $f12, 0($sp)
+
+ lui $gp, %hi(%neg(%gp_rel(__xray_FunctionEntry)))
+ daddu $gp, $gp, $t9
+ daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionEntry)))
+
+ dla $t9, _ZN6__xray19XRayPatchedFunctionE
+ ld $t9, 0($t9)
+
+ beqz $t9, FunctionEntry_restore
+
+ // a1=0 means that we are tracing an entry event
+ move $a1, $zero
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionEntry_restore:
+ // Restore argument registers
+ ldc1 $f12, 0($sp)
+ ldc1 $f13, 8($sp)
+ ldc1 $f14, 16($sp)
+ ldc1 $f15, 24($sp)
+ ldc1 $f16, 32($sp)
+ ldc1 $f17, 40($sp)
+ ldc1 $f18, 48($sp)
+ ldc1 $f19, 56($sp)
+ ld $a0, 64($sp)
+ ld $a1, 72($sp)
+ ld $a2, 80($sp)
+ ld $a3, 88($sp)
+ ld $a4, 96($sp)
+ ld $a5, 104($sp)
+ ld $a6, 112($sp)
+ ld $a7, 120($sp)
+ ld $gp, 128($sp)
+ ld $ra, 136($sp)
+ daddiu $sp, $sp, 144
+ jr $ra
+FunctionEntry_end:
+ .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry
+ .cfi_endproc
+
+ .text
+ .globl __xray_FunctionExit
+ .p2align 2
+ .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+ .cfi_startproc
+ // Save return registers before doing any actual work.
+ .cfi_def_cfa_offset 64
+ daddiu $sp, $sp, -64
+ sd $ra, 56($sp)
+ .cfi_offset 31, -8
+ sd $gp, 48($sp)
+ sd $a0, 40($sp)
+ sd $v1, 32($sp)
+ sd $v0, 24($sp)
+ sdc1 $f2, 16($sp)
+ sdc1 $f1, 8($sp)
+ sdc1 $f0, 0($sp)
+
+ lui $gp, %hi(%neg(%gp_rel(__xray_FunctionExit)))
+ daddu $gp, $gp, $t9
+ daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionExit)))
+
+ dla $t9, _ZN6__xray19XRayPatchedFunctionE
+ ld $t9, 0($t9)
+
+ beqz $t9, FunctionExit_restore
+
+ // a1=1 means that we are tracing an exit event
+ li $a1, 1
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionExit_restore:
+ // Restore return registers
+ ldc1 $f0, 0($sp)
+ ldc1 $f1, 8($sp)
+ ldc1 $f2, 16($sp)
+ ld $v0, 24($sp)
+ ld $v1, 32($sp)
+ ld $a0, 40($sp)
+ ld $gp, 48($sp)
+ ld $ra, 56($sp)
+ daddiu $sp, $sp, 64
+ jr $ra
+
+FunctionExit_end:
+ .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit
+ .cfi_endproc
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp
new file mode 100644
index 000000000000..878c46930fee
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp
@@ -0,0 +1,15 @@
+#include <atomic>
+#include <xray/xray_interface.h>
+
+namespace __xray {
+
+extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction;
+
+// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand.
+void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) {
+ auto fptr = __xray::XRayPatchedFunction.load();
+ if (fptr != nullptr)
+ (*fptr)(FuncId, Type);
+}
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S
new file mode 100644
index 000000000000..250e2e5be67a
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S
@@ -0,0 +1,235 @@
+ .text
+ .abiversion 2
+ .globl __xray_FunctionEntry
+ .p2align 4
+__xray_FunctionEntry:
+ std 0, 16(1)
+ stdu 1, -408(1)
+# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers.
+# If this appears to be slow, the caller needs to pass in number of generic,
+# floating point, and vector parameters, so that we only spill those live ones.
+ std 3, 32(1)
+ ld 3, 400(1) # FuncId
+ std 4, 40(1)
+ std 5, 48(1)
+ std 6, 56(1)
+ std 7, 64(1)
+ std 8, 72(1)
+ std 9, 80(1)
+ std 10, 88(1)
+ addi 4, 1, 96
+ stxsdx 1, 0, 4
+ addi 4, 1, 104
+ stxsdx 2, 0, 4
+ addi 4, 1, 112
+ stxsdx 3, 0, 4
+ addi 4, 1, 120
+ stxsdx 4, 0, 4
+ addi 4, 1, 128
+ stxsdx 5, 0, 4
+ addi 4, 1, 136
+ stxsdx 6, 0, 4
+ addi 4, 1, 144
+ stxsdx 7, 0, 4
+ addi 4, 1, 152
+ stxsdx 8, 0, 4
+ addi 4, 1, 160
+ stxsdx 9, 0, 4
+ addi 4, 1, 168
+ stxsdx 10, 0, 4
+ addi 4, 1, 176
+ stxsdx 11, 0, 4
+ addi 4, 1, 184
+ stxsdx 12, 0, 4
+ addi 4, 1, 192
+ stxsdx 13, 0, 4
+ addi 4, 1, 200
+ stxvd2x 34, 0, 4
+ addi 4, 1, 216
+ stxvd2x 35, 0, 4
+ addi 4, 1, 232
+ stxvd2x 36, 0, 4
+ addi 4, 1, 248
+ stxvd2x 37, 0, 4
+ addi 4, 1, 264
+ stxvd2x 38, 0, 4
+ addi 4, 1, 280
+ stxvd2x 39, 0, 4
+ addi 4, 1, 296
+ stxvd2x 40, 0, 4
+ addi 4, 1, 312
+ stxvd2x 41, 0, 4
+ addi 4, 1, 328
+ stxvd2x 42, 0, 4
+ addi 4, 1, 344
+ stxvd2x 43, 0, 4
+ addi 4, 1, 360
+ stxvd2x 44, 0, 4
+ addi 4, 1, 376
+ stxvd2x 45, 0, 4
+ std 2, 392(1)
+ mflr 0
+ std 0, 400(1)
+
+ li 4, 0
+ bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
+ nop
+
+ addi 4, 1, 96
+ lxsdx 1, 0, 4
+ addi 4, 1, 104
+ lxsdx 2, 0, 4
+ addi 4, 1, 112
+ lxsdx 3, 0, 4
+ addi 4, 1, 120
+ lxsdx 4, 0, 4
+ addi 4, 1, 128
+ lxsdx 5, 0, 4
+ addi 4, 1, 136
+ lxsdx 6, 0, 4
+ addi 4, 1, 144
+ lxsdx 7, 0, 4
+ addi 4, 1, 152
+ lxsdx 8, 0, 4
+ addi 4, 1, 160
+ lxsdx 9, 0, 4
+ addi 4, 1, 168
+ lxsdx 10, 0, 4
+ addi 4, 1, 176
+ lxsdx 11, 0, 4
+ addi 4, 1, 184
+ lxsdx 12, 0, 4
+ addi 4, 1, 192
+ lxsdx 13, 0, 4
+ addi 4, 1, 200
+ lxvd2x 34, 0, 4
+ addi 4, 1, 216
+ lxvd2x 35, 0, 4
+ addi 4, 1, 232
+ lxvd2x 36, 0, 4
+ addi 4, 1, 248
+ lxvd2x 37, 0, 4
+ addi 4, 1, 264
+ lxvd2x 38, 0, 4
+ addi 4, 1, 280
+ lxvd2x 39, 0, 4
+ addi 4, 1, 296
+ lxvd2x 40, 0, 4
+ addi 4, 1, 312
+ lxvd2x 41, 0, 4
+ addi 4, 1, 328
+ lxvd2x 42, 0, 4
+ addi 4, 1, 344
+ lxvd2x 43, 0, 4
+ addi 4, 1, 360
+ lxvd2x 44, 0, 4
+ addi 4, 1, 376
+ lxvd2x 45, 0, 4
+ ld 0, 400(1)
+ mtlr 0
+ ld 2, 392(1)
+ ld 3, 32(1)
+ ld 4, 40(1)
+ ld 5, 48(1)
+ ld 6, 56(1)
+ ld 7, 64(1)
+ ld 8, 72(1)
+ ld 9, 80(1)
+ ld 10, 88(1)
+
+ addi 1, 1, 408
+ ld 0, 16(1)
+ blr
+
+ .globl __xray_FunctionExit
+ .p2align 4
+__xray_FunctionExit:
+ std 0, 16(1)
+ stdu 1, -256(1)
+# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers.
+# If this appears to be slow, the caller needs to pass in number of generic,
+# floating point, and vector parameters, so that we only spill those live ones.
+ std 3, 32(1)
+ ld 3, 248(1) # FuncId
+ std 4, 40(1)
+ addi 4, 1, 48
+ stxsdx 1, 0, 4
+ addi 4, 1, 56
+ stxsdx 2, 0, 4
+ addi 4, 1, 64
+ stxsdx 3, 0, 4
+ addi 4, 1, 72
+ stxsdx 4, 0, 4
+ addi 4, 1, 80
+ stxsdx 5, 0, 4
+ addi 4, 1, 88
+ stxsdx 6, 0, 4
+ addi 4, 1, 96
+ stxsdx 7, 0, 4
+ addi 4, 1, 104
+ stxsdx 8, 0, 4
+ addi 4, 1, 112
+ stxvd2x 34, 0, 4
+ addi 4, 1, 128
+ stxvd2x 35, 0, 4
+ addi 4, 1, 144
+ stxvd2x 36, 0, 4
+ addi 4, 1, 160
+ stxvd2x 37, 0, 4
+ addi 4, 1, 176
+ stxvd2x 38, 0, 4
+ addi 4, 1, 192
+ stxvd2x 39, 0, 4
+ addi 4, 1, 208
+ stxvd2x 40, 0, 4
+ addi 4, 1, 224
+ stxvd2x 41, 0, 4
+ std 2, 240(1)
+ mflr 0
+ std 0, 248(1)
+
+ li 4, 1
+ bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
+ nop
+
+ addi 4, 1, 48
+ lxsdx 1, 0, 4
+ addi 4, 1, 56
+ lxsdx 2, 0, 4
+ addi 4, 1, 64
+ lxsdx 3, 0, 4
+ addi 4, 1, 72
+ lxsdx 4, 0, 4
+ addi 4, 1, 80
+ lxsdx 5, 0, 4
+ addi 4, 1, 88
+ lxsdx 6, 0, 4
+ addi 4, 1, 96
+ lxsdx 7, 0, 4
+ addi 4, 1, 104
+ lxsdx 8, 0, 4
+ addi 4, 1, 112
+ lxvd2x 34, 0, 4
+ addi 4, 1, 128
+ lxvd2x 35, 0, 4
+ addi 4, 1, 144
+ lxvd2x 36, 0, 4
+ addi 4, 1, 160
+ lxvd2x 37, 0, 4
+ addi 4, 1, 176
+ lxvd2x 38, 0, 4
+ addi 4, 1, 192
+ lxvd2x 39, 0, 4
+ addi 4, 1, 208
+ lxvd2x 40, 0, 4
+ addi 4, 1, 224
+ lxvd2x 41, 0, 4
+ ld 0, 248(1)
+ mtlr 0
+ ld 2, 240(1)
+ ld 3, 32(1)
+ ld 4, 40(1)
+
+ addi 1, 1, 256
+ ld 0, 16(1)
+ blr
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S
new file mode 100644
index 000000000000..12c5a6ccd9a4
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -0,0 +1,289 @@
+//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the X86-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../builtins/assembly.h"
+#include "../sanitizer_common/sanitizer_asm.h"
+
+
+
+.macro SAVE_REGISTERS
+ pushfq
+ subq $240, %rsp
+ CFI_DEF_CFA_OFFSET(248)
+ movq %rbp, 232(%rsp)
+ movupd %xmm0, 216(%rsp)
+ movupd %xmm1, 200(%rsp)
+ movupd %xmm2, 184(%rsp)
+ movupd %xmm3, 168(%rsp)
+ movupd %xmm4, 152(%rsp)
+ movupd %xmm5, 136(%rsp)
+ movupd %xmm6, 120(%rsp)
+ movupd %xmm7, 104(%rsp)
+ movq %rdi, 96(%rsp)
+ movq %rax, 88(%rsp)
+ movq %rdx, 80(%rsp)
+ movq %rsi, 72(%rsp)
+ movq %rcx, 64(%rsp)
+ movq %r8, 56(%rsp)
+ movq %r9, 48(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 32(%rsp)
+ movq %r12, 24(%rsp)
+ movq %r13, 16(%rsp)
+ movq %r14, 8(%rsp)
+ movq %r15, 0(%rsp)
+.endm
+
+.macro RESTORE_REGISTERS
+ movq 232(%rsp), %rbp
+ movupd 216(%rsp), %xmm0
+ movupd 200(%rsp), %xmm1
+ movupd 184(%rsp), %xmm2
+ movupd 168(%rsp), %xmm3
+ movupd 152(%rsp), %xmm4
+ movupd 136(%rsp), %xmm5
+ movupd 120(%rsp) , %xmm6
+ movupd 104(%rsp) , %xmm7
+ movq 96(%rsp), %rdi
+ movq 88(%rsp), %rax
+ movq 80(%rsp), %rdx
+ movq 72(%rsp), %rsi
+ movq 64(%rsp), %rcx
+ movq 56(%rsp), %r8
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r10
+ movq 32(%rsp), %r11
+ movq 24(%rsp), %r12
+ movq 16(%rsp), %r13
+ movq 8(%rsp), %r14
+ movq 0(%rsp), %r15
+ addq $240, %rsp
+ popfq
+ CFI_DEF_CFA_OFFSET(8)
+.endm
+
+.macro ALIGNED_CALL_RAX
+ // Call the logging handler, after aligning the stack to a 16-byte boundary.
+ // The approach we're taking here uses additional stack space to stash the
+ // stack pointer twice before aligning the pointer to 16-bytes. If the stack
+ // was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+ // pointer, we can always look -8 bytes from the current position to get
+ // either of the values we've stashed in the first place.
+ pushq %rsp
+ pushq (%rsp)
+ andq $-0x10, %rsp
+ callq *%rax
+ movq 8(%rsp), %rsp
+.endm
+
+ .text
+#if !defined(__APPLE__)
+ .section .text
+ .file "xray_trampoline_x86.S"
+#else
+ .section __TEXT,__text
+#endif
+
+//===----------------------------------------------------------------------===//
+
+ .globl ASM_SYMBOL(__xray_FunctionEntry)
+ ASM_HIDDEN(__xray_FunctionEntry)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_FunctionEntry)
+# LLVM-MCA-BEGIN __xray_FunctionEntry
+ASM_SYMBOL(__xray_FunctionEntry):
+ CFI_STARTPROC
+ SAVE_REGISTERS
+
+ // This load has to be atomic, it's concurrent with __xray_patch().
+ // On x86/amd64, a simple (type-aligned) MOV instruction is enough.
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+ testq %rax, %rax
+ je .Ltmp0
+
+ // The patched function prologue puts its xray_instr_map index into %r10d.
+ movl %r10d, %edi
+ xor %esi,%esi
+ ALIGNED_CALL_RAX
+
+.Ltmp0:
+ RESTORE_REGISTERS
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_FunctionEntry)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+ .globl ASM_SYMBOL(__xray_FunctionExit)
+ ASM_HIDDEN(__xray_FunctionExit)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_FunctionExit)
+# LLVM-MCA-BEGIN __xray_FunctionExit
+ASM_SYMBOL(__xray_FunctionExit):
+ CFI_STARTPROC
+ // Save the important registers first. Since we're assuming that this
+ // function is only jumped into, we only preserve the registers for
+ // returning.
+ subq $56, %rsp
+ CFI_DEF_CFA_OFFSET(64)
+ movq %rbp, 48(%rsp)
+ movupd %xmm0, 32(%rsp)
+ movupd %xmm1, 16(%rsp)
+ movq %rax, 8(%rsp)
+ movq %rdx, 0(%rsp)
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+ testq %rax,%rax
+ je .Ltmp2
+
+ movl %r10d, %edi
+ movl $1, %esi
+ ALIGNED_CALL_RAX
+
+.Ltmp2:
+ // Restore the important registers.
+ movq 48(%rsp), %rbp
+ movupd 32(%rsp), %xmm0
+ movupd 16(%rsp), %xmm1
+ movq 8(%rsp), %rax
+ movq 0(%rsp), %rdx
+ addq $56, %rsp
+ CFI_DEF_CFA_OFFSET(8)
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_FunctionExit)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+ .globl ASM_SYMBOL(__xray_FunctionTailExit)
+ ASM_HIDDEN(__xray_FunctionTailExit)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
+# LLVM-MCA-BEGIN __xray_FunctionTailExit
+ASM_SYMBOL(__xray_FunctionTailExit):
+ CFI_STARTPROC
+ SAVE_REGISTERS
+
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+ testq %rax,%rax
+ je .Ltmp4
+
+ movl %r10d, %edi
+ movl $2, %esi
+
+ ALIGNED_CALL_RAX
+
+.Ltmp4:
+ RESTORE_REGISTERS
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_FunctionTailExit)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+ .globl ASM_SYMBOL(__xray_ArgLoggerEntry)
+ ASM_HIDDEN(__xray_ArgLoggerEntry)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry)
+# LLVM-MCA-BEGIN __xray_ArgLoggerEntry
+ASM_SYMBOL(__xray_ArgLoggerEntry):
+ CFI_STARTPROC
+ SAVE_REGISTERS
+
+ // Again, these function pointer loads must be atomic; MOV is fine.
+ movq ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
+ testq %rax, %rax
+ jne .Larg1entryLog
+
+ // If [arg1 logging handler] not set, defer to no-arg logging.
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+ testq %rax, %rax
+ je .Larg1entryFail
+
+.Larg1entryLog:
+
+ // First argument will become the third
+ movq %rdi, %rdx
+
+ // XRayEntryType::LOG_ARGS_ENTRY into the second
+ mov $0x3, %esi
+
+ // 32-bit function ID becomes the first
+ movl %r10d, %edi
+ ALIGNED_CALL_RAX
+
+.Larg1entryFail:
+ RESTORE_REGISTERS
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_ArgLoggerEntry)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+ .global ASM_SYMBOL(__xray_CustomEvent)
+ ASM_HIDDEN(__xray_CustomEvent)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_CustomEvent)
+# LLVM-MCA-BEGIN __xray_CustomEvent
+ASM_SYMBOL(__xray_CustomEvent):
+ CFI_STARTPROC
+ SAVE_REGISTERS
+
+ // We take two arguments to this trampoline, which should be in rdi and rsi
+ // already.
+ movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
+ testq %rax,%rax
+ je .LcustomEventCleanup
+
+ ALIGNED_CALL_RAX
+
+.LcustomEventCleanup:
+ RESTORE_REGISTERS
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_CustomEvent)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+ .global ASM_SYMBOL(__xray_TypedEvent)
+ ASM_HIDDEN(__xray_TypedEvent)
+ .align 16, 0x90
+ ASM_TYPE_FUNCTION(__xray_TypedEvent)
+# LLVM-MCA-BEGIN __xray_TypedEvent
+ASM_SYMBOL(__xray_TypedEvent):
+ CFI_STARTPROC
+ SAVE_REGISTERS
+
+ // We pass three arguments to this trampoline, which should be in rdi, rsi
+ // and rdx without our intervention.
+ movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
+ testq %rax,%rax
+ je .LtypedEventCleanup
+
+ ALIGNED_CALL_RAX
+
+.LtypedEventCleanup:
+ RESTORE_REGISTERS
+ retq
+# LLVM-MCA-END
+ ASM_SIZE(__xray_TypedEvent)
+ CFI_ENDPROC
+
+//===----------------------------------------------------------------------===//
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h
new file mode 100644
index 000000000000..bd7e1911abb3
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h
@@ -0,0 +1,90 @@
+//===-- xray_tsc.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_EMULATE_TSC_H
+#define XRAY_EMULATE_TSC_H
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __xray {
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+}
+
+#if SANITIZER_FUCHSIA
+#include <zircon/syscalls.h>
+
+namespace __xray {
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ CPU = 0;
+ return _zx_ticks_get();
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ return _zx_ticks_per_second();
+}
+
+} // namespace __xray
+
+#else // SANITIZER_FUCHSIA
+
+#if defined(__x86_64__)
+#include "xray_x86_64.inc"
+#elif defined(__powerpc64__)
+#include "xray_powerpc64.inc"
+#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__)
+// Emulated TSC.
+// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+// not have a constant frequency like TSC on x86(_64), it may go faster
+// or slower depending on CPU turbo or power saving mode. Furthermore,
+// to read from CP15 on ARM a kernel modification or a driver is needed.
+// We can not require this from users of compiler-rt.
+// So on ARM we use clock_gettime() which gives the result in nanoseconds.
+// To get the measurements per second, we scale this by the number of
+// nanoseconds per second, pretending that the TSC frequency is 1GHz and
+// one TSC tick is 1 nanosecond.
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
+ TS.tv_sec = 0;
+ TS.tv_nsec = 0;
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ return NanosecondsPerSecond;
+}
+
+} // namespace __xray
+
+#else
+#error Target architecture is not supported.
+#endif // CPU architecture
+#endif // SANITIZER_FUCHSIA
+
+#endif // XRAY_EMULATE_TSC_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp
new file mode 100644
index 000000000000..4c8ad5b92be7
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp
@@ -0,0 +1,199 @@
+//===-- xray_utils.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_utils.h"
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_allocator.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include <cstdio>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <tuple>
+#include <unistd.h>
+#include <utility>
+
+#if SANITIZER_FUCHSIA
+#include "sanitizer_common/sanitizer_symbolizer_fuchsia.h"
+
+#include <inttypes.h>
+#include <zircon/process.h>
+#include <zircon/sanitizer.h>
+#include <zircon/status.h>
+#include <zircon/syscalls.h>
+#endif
+
+namespace __xray {
+
+#if SANITIZER_FUCHSIA
+constexpr const char* ProfileSinkName = "llvm-xray";
+
+LogWriter::~LogWriter() {
+ _zx_handle_close(Vmo);
+}
+
+void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT {
+ if (Begin == End)
+ return;
+ auto TotalBytes = std::distance(Begin, End);
+
+ const size_t PageSize = flags()->xray_page_size_override > 0
+ ? flags()->xray_page_size_override
+ : GetPageSizeCached();
+ if (RoundUpTo(Offset, PageSize) != RoundUpTo(Offset + TotalBytes, PageSize)) {
+ // Resize the VMO to ensure there's sufficient space for the data.
+ zx_status_t Status = _zx_vmo_set_size(Vmo, Offset + TotalBytes);
+ if (Status != ZX_OK) {
+ Report("Failed to resize VMO: %s\n", _zx_status_get_string(Status));
+ return;
+ }
+ }
+
+ // Write the data into VMO.
+ zx_status_t Status = _zx_vmo_write(Vmo, Begin, Offset, TotalBytes);
+ if (Status != ZX_OK) {
+ Report("Failed to write: %s\n", _zx_status_get_string(Status));
+ return;
+ }
+ Offset += TotalBytes;
+
+ // Record the data size as a property of the VMO.
+ _zx_object_set_property(Vmo, ZX_PROP_VMO_CONTENT_SIZE,
+ &Offset, sizeof(Offset));
+}
+
+void LogWriter::Flush() XRAY_NEVER_INSTRUMENT {
+ // Nothing to do here since WriteAll writes directly into the VMO.
+}
+
+LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
+ // Create VMO to hold the profile data.
+ zx_handle_t Vmo;
+ zx_status_t Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo);
+ if (Status != ZX_OK) {
+ Report("XRay: cannot create VMO: %s\n", _zx_status_get_string(Status));
+ return nullptr;
+ }
+
+ // Get the KOID of the current process to use in the VMO name.
+ zx_info_handle_basic_t Info;
+ Status = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info,
+ sizeof(Info), NULL, NULL);
+ if (Status != ZX_OK) {
+ Report("XRay: cannot get basic info about current process handle: %s\n",
+ _zx_status_get_string(Status));
+ return nullptr;
+ }
+
+ // Give the VMO a name including our process KOID so it's easy to spot.
+ char VmoName[ZX_MAX_NAME_LEN];
+ internal_snprintf(VmoName, sizeof(VmoName), "%s.%zu", ProfileSinkName,
+ Info.koid);
+ _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName));
+
+ // Duplicate the handle since __sanitizer_publish_data consumes it and
+ // LogWriter needs to hold onto it.
+ zx_handle_t Handle;
+ Status =_zx_handle_duplicate(Vmo, ZX_RIGHT_SAME_RIGHTS, &Handle);
+ if (Status != ZX_OK) {
+ Report("XRay: cannot duplicate VMO handle: %s\n",
+ _zx_status_get_string(Status));
+ return nullptr;
+ }
+
+ // Publish the VMO that receives the logging. Note the VMO's contents can
+ // grow and change after publication. The contents won't be read out until
+ // after the process exits.
+ __sanitizer_publish_data(ProfileSinkName, Handle);
+
+ // Use the dumpfile symbolizer markup element to write the name of the VMO.
+ Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName);
+
+ LogWriter *LW = reinterpret_cast<LogWriter *>(InternalAlloc(sizeof(LogWriter)));
+ new (LW) LogWriter(Vmo);
+ return LW;
+}
+
+void LogWriter::Close(LogWriter *LW) {
+ LW->~LogWriter();
+ InternalFree(LW);
+}
+#else // SANITIZER_FUCHSIA
+LogWriter::~LogWriter() {
+ internal_close(Fd);
+}
+
+void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT {
+ if (Begin == End)
+ return;
+ auto TotalBytes = std::distance(Begin, End);
+ while (auto Written = write(Fd, Begin, TotalBytes)) {
+ if (Written < 0) {
+ if (errno == EINTR)
+ continue; // Try again.
+ Report("Failed to write; errno = %d\n", errno);
+ return;
+ }
+ TotalBytes -= Written;
+ if (TotalBytes == 0)
+ break;
+ Begin += Written;
+ }
+}
+
+void LogWriter::Flush() XRAY_NEVER_INSTRUMENT {
+ fsync(Fd);
+}
+
+LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
+ // Open a temporary file once for the log.
+ char TmpFilename[256] = {};
+ char TmpWildcardPattern[] = "XXXXXX";
+ auto **Argv = GetArgv();
+ const char *Progname = !Argv ? "(unknown)" : Argv[0];
+ const char *LastSlash = internal_strrchr(Progname, '/');
+
+ if (LastSlash != nullptr)
+ Progname = LastSlash + 1;
+
+ int NeededLength = internal_snprintf(
+ TmpFilename, sizeof(TmpFilename), "%s%s.%s",
+ flags()->xray_logfile_base, Progname, TmpWildcardPattern);
+ if (NeededLength > int(sizeof(TmpFilename))) {
+ Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename);
+ return nullptr;
+ }
+ int Fd = mkstemp(TmpFilename);
+ if (Fd == -1) {
+ Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
+ TmpFilename);
+ return nullptr;
+ }
+ if (Verbosity())
+ Report("XRay: Log file in '%s'\n", TmpFilename);
+
+ LogWriter *LW = allocate<LogWriter>();
+ new (LW) LogWriter(Fd);
+ return LW;
+}
+
+void LogWriter::Close(LogWriter *LW) {
+ LW->~LogWriter();
+ deallocate(LW);
+}
+#endif // SANITIZER_FUCHSIA
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h
new file mode 100644
index 000000000000..333826168c0d
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h
@@ -0,0 +1,85 @@
+//===-- xray_utils.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Some shared utilities for the XRay runtime implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_UTILS_H
+#define XRAY_UTILS_H
+
+#include <cstddef>
+#include <cstdint>
+#include <sys/types.h>
+#include <utility>
+
+#include "sanitizer_common/sanitizer_common.h"
+#if SANITIZER_FUCHSIA
+#include <zircon/types.h>
+#endif
+
+namespace __xray {
+
+class LogWriter {
+public:
+#if SANITIZER_FUCHSIA
+ LogWriter(zx_handle_t Vmo) : Vmo(Vmo) {}
+#else
+ explicit LogWriter(int Fd) : Fd(Fd) {}
+#endif
+ ~LogWriter();
+
+ // Write a character range into a log.
+ void WriteAll(const char *Begin, const char *End);
+
+ void Flush();
+
+ // Returns a new log instance initialized using the flag-provided values.
+ static LogWriter *Open();
+ // Closes and deallocates the log instance.
+ static void Close(LogWriter *LogWriter);
+
+private:
+#if SANITIZER_FUCHSIA
+ zx_handle_t Vmo = ZX_HANDLE_INVALID;
+ uint64_t Offset = 0;
+#else
+ int Fd = -1;
+#endif
+};
+
+constexpr size_t gcd(size_t a, size_t b) {
+ return (b == 0) ? a : gcd(b, a % b);
+}
+
+constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); }
+
+constexpr size_t nearest_boundary(size_t number, size_t multiple) {
+ return multiple * ((number / multiple) + (number % multiple ? 1 : 0));
+}
+
+constexpr size_t next_pow2_helper(size_t num, size_t acc) {
+ return (1u << acc) >= num ? (1u << acc) : next_pow2_helper(num, acc + 1);
+}
+
+constexpr size_t next_pow2(size_t number) {
+ return next_pow2_helper(number, 1);
+}
+
+template <class T> constexpr T &max(T &A, T &B) { return A > B ? A : B; }
+
+template <class T> constexpr T &min(T &A, T &B) { return A <= B ? A : B; }
+
+constexpr ptrdiff_t diff(uintptr_t A, uintptr_t B) {
+ return max(A, B) - min(A, B);
+}
+
+} // namespace __xray
+
+#endif // XRAY_UTILS_H
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp
new file mode 100644
index 000000000000..f3742ac71290
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -0,0 +1,359 @@
+#include "cpuid.h"
+#include "sanitizer_common/sanitizer_common.h"
+#if !SANITIZER_FUCHSIA
+#include "sanitizer_common/sanitizer_posix.h"
+#endif
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
+#include <sys/types.h>
+#if SANITIZER_OPENBSD
+#include <sys/time.h>
+#include <machine/cpu.h>
+#endif
+#include <sys/sysctl.h>
+#elif SANITIZER_FUCHSIA
+#include <zircon/syscalls.h>
+#endif
+
+#include <atomic>
+#include <cstdint>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <unistd.h>
+
+namespace __xray {
+
+#if SANITIZER_LINUX
+static std::pair<ssize_t, bool>
+retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+ auto BytesToRead = std::distance(Begin, End);
+ ssize_t BytesRead;
+ ssize_t TotalBytesRead = 0;
+ while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+ if (BytesRead == -1) {
+ if (errno == EINTR)
+ continue;
+ Report("Read error; errno = %d\n", errno);
+ return std::make_pair(TotalBytesRead, false);
+ }
+
+ TotalBytesRead += BytesRead;
+ BytesToRead -= BytesRead;
+ Begin += BytesRead;
+ }
+ return std::make_pair(TotalBytesRead, true);
+}
+
+static bool readValueFromFile(const char *Filename,
+ long long *Value) XRAY_NEVER_INSTRUMENT {
+ int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+ if (Fd == -1)
+ return false;
+ static constexpr size_t BufSize = 256;
+ char Line[BufSize] = {};
+ ssize_t BytesRead;
+ bool Success;
+ std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ close(Fd);
+ if (!Success)
+ return false;
+ const char *End = nullptr;
+ long long Tmp = internal_simple_strtoll(Line, &End, 10);
+ bool Result = false;
+ if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+ *Value = Tmp;
+ Result = true;
+ }
+ return Result;
+}
+
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ long long TSCFrequency = -1;
+ if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
+ } else if (readValueFromFile(
+ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+ return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
+}
+#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ long long TSCFrequency = -1;
+ size_t tscfreqsz = sizeof(TSCFrequency);
+#if SANITIZER_OPENBSD
+ int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
+ if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
+#elif SANITIZER_MAC
+ if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
+ &tscfreqsz, NULL, 0) != -1) {
+
+#else
+ if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
+ NULL, 0) != -1) {
+#endif
+ return static_cast<uint64_t>(TSCFrequency);
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+
+ return 0;
+}
+#elif !SANITIZER_FUCHSIA
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ /* Not supported */
+ return 0;
+}
+#endif
+
+static constexpr uint8_t CallOpCode = 0xe8;
+static constexpr uint16_t MovR10Seq = 0xba41;
+static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
+static constexpr uint8_t JmpOpCode = 0xe9;
+static constexpr uint8_t RetOpCode = 0xc3;
+static constexpr uint16_t NopwSeq = 0x9066;
+
+static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +9
+ // <9 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // call <relative 32bit offset to entry trampoline>
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 2-byte jmp instruction).
+ // 2. Put the call opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset to the trampoline's address.
+ const uint64_t Address = Sled.address();
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+ (static_cast<int64_t>(Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline,
+ reinterpret_cast<void *>(Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // ret
+ // <10 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // jmp <relative 32bit offset to exit trampoline>
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 1-byte ret instruction).
+ // 2. Put the jmp opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset fo the
+ // __xray_FunctionExit function's address.
+ const uint64_t Address = Sled.address();
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+ (static_cast<int64_t>(Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionExit, reinterpret_cast<void *>(Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
+ *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the tail call sled with a similar
+ // sequence as the entry sled, but calls the tail exit sled instead.
+ const uint64_t Address = Sled.address();
+ int64_t TrampolineOffset =
+ reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
+ (static_cast<int64_t>(Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionTailExit, reinterpret_cast<void *>(Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // In Version 0:
+ //
+ // xray_sled_n:
+ // jmp +20 // 2 bytes
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes*
+ // ...
+ //
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ //
+ // ---
+ //
+ // In Version 1 or 2:
+ //
+ // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+ // to a jmp, use 15 bytes instead.
+ //
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ switch (Sled.Version) {
+ case 1:
+ case 2:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
+ std::memory_order_release);
+ break;
+ case 0:
+ default:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
+ std::memory_order_release);
+ break;
+ }
+ }
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +20 // 2 byte instruction
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes
+ // ...
+ //
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ // The 20 byte sled stashes three argument registers, calls the trampoline,
+ // unstashes the registers and returns. If the arguments are already in
+ // the correct registers, the stashing and unstashing become equivalently
+ // sized nops.
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
+ std::memory_order_release);
+ }
+ return false;
+}
+
+#if !SANITIZER_FUCHSIA
+// We determine whether the CPU we're running on has the correct features we
+// need. In x86_64 this will be rdtscp support.
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
+ unsigned int EAX, EBX, ECX, EDX;
+
+ // We check whether rdtscp support is enabled. According to the x86_64 manual,
+ // level should be set at 0x80000001, and we should have a look at bit 27 in
+ // EDX. That's 0x8000000 (or 1u << 27).
+ __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
+ : "0"(0x80000001));
+ if (!(EDX & (1u << 27))) {
+ Report("Missing rdtscp support.\n");
+ return false;
+ }
+ // Also check whether we can determine the CPU frequency, since if we cannot,
+ // we should use the emulated TSC instead.
+ if (!getTSCFrequency()) {
+ Report("Unable to determine CPU frequency.\n");
+ return false;
+ }
+ return true;
+}
+#endif
+
+} // namespace __xray
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc
new file mode 100644
index 000000000000..477900355cf4
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc
@@ -0,0 +1,33 @@
+//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <x86intrin.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ unsigned LongCPU;
+ unsigned long Rax, Rdx;
+ __asm__ __volatile__("rdtscp\n" : "=a"(Rax), "=d"(Rdx), "=c"(LongCPU) ::);
+ CPU = LongCPU;
+ return (Rdx << 32) + Rax;
+}
+
+uint64_t getTSCFrequency();
+
+bool probeRequiredCPUFeatures();
+
+} // namespace __xray