diff options
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/xray')
54 files changed, 9265 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt new file mode 100644 index 000000000000..963fff2d697e --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt @@ -0,0 +1,4 @@ +___start_xray_fn_idx +___start_xray_instr_map +___stop_xray_fn_idx +___stop_xray_instr_map diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp new file mode 100644 index 000000000000..00105d30b4db --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp @@ -0,0 +1,127 @@ +//===-- xray_AArch64.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of AArch64-specific routines (64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! + PO_LdrW0_12 = 0x18000060, // LDR W0, #12 + PO_LdrX16_12 = 0x58000070, // LDR X16, #12 + PO_BlrX16 = 0xD63F0200, // BLR X16 + PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16 + PO_B32 = 0x14000008 // B #32 +}; + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #32 + // 7 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr} + // LDR W0, #12 ; W0 := function ID + // LDR X16,#12 ; X16 := address of the trampoline + // BLR X16 + // ;DATA: 32 bits of function ID + // ;DATA: lower 32 bits of the address of the trampoline + // ;DATA: higher 32 bits of the address of the trampoline + // LDP X0, X30, [SP], #16 ; POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #32 + + uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address()); + uint32_t *CurAddress = FirstAddress + 1; + if (Enable) { + *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16); + CurAddress++; + *CurAddress = FuncId; + CurAddress++; + *reinterpret_cast<void (**)()>(CurAddress) = TracingHook; + CurAddress += 2; + *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16); + CurAddress++; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); + } + __clear_cache(reinterpret_cast<char *>(FirstAddress), + reinterpret_cast<char *>(CurAddress)); + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in aarch64? + return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h new file mode 100644 index 000000000000..4b42c473261d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h @@ -0,0 +1,288 @@ +//===-- xray_allocator.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the allocator interface for an arena allocator, used primarily for +// the profiling runtime. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_ALLOCATOR_H +#define XRAY_ALLOCATOR_H + +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "sanitizer_common/sanitizer_mutex.h" +#if SANITIZER_FUCHSIA +#include <zircon/process.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#else +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_defs.h" +#include "xray_utils.h" +#include <cstddef> +#include <cstdint> +#include <sys/mman.h> + +namespace __xray { + +// We implement our own memory allocation routine which will bypass the +// internal allocator. This allows us to manage the memory directly, using +// mmap'ed memory to back the allocators. +template <class T> T *allocate() XRAY_NEVER_INSTRUMENT { + uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA + zx_handle_t Vmo; + zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo); + if (Status != ZX_OK) { + if (Verbosity()) + Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", + sizeof(T), _zx_status_get_string(Status)); + return nullptr; + } + uintptr_t B; + Status = + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, + Vmo, 0, sizeof(T), &B); + _zx_handle_close(Vmo); + if (Status != ZX_OK) { + if (Verbosity()) + Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T), + _zx_status_get_string(Status)); + return nullptr; + } + return reinterpret_cast<T *>(B); +#else + uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + int ErrNo = 0; + if (UNLIKELY(internal_iserror(B, &ErrNo))) { + if (Verbosity()) + Report( + "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n", + RoundedSize, B); + return nullptr; + } +#endif + return reinterpret_cast<T *>(B); +} + +template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA + _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), + RoundedSize); +#else + internal_munmap(B, RoundedSize); +#endif +} + +template <class T = unsigned char> +T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT { + uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA + zx_handle_t Vmo; + zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo); + if (Status != ZX_OK) { + if (Verbosity()) + Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S, + _zx_status_get_string(Status)); + return nullptr; + } + uintptr_t B; + Status = _zx_vmar_map(_zx_vmar_root_self(), + ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B); + _zx_handle_close(Vmo); + if (Status != ZX_OK) { + if (Verbosity()) + Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S, + _zx_status_get_string(Status)); + return nullptr; + } +#else + uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + int ErrNo = 0; + if (UNLIKELY(internal_iserror(B, &ErrNo))) { + if (Verbosity()) + Report( + "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n", + RoundedSize, B); + return nullptr; + } +#endif + return reinterpret_cast<T *>(B); +} + +template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA + _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), + RoundedSize); +#else + internal_munmap(B, RoundedSize); +#endif +} + +template <class T, class... U> +T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT { + auto A = allocateBuffer<T>(N); + if (A != nullptr) + while (N > 0) + new (A + (--N)) T(std::forward<U>(Us)...); + return A; +} + +/// The Allocator type hands out fixed-sized chunks of memory that are +/// cache-line aligned and sized. This is useful for placement of +/// performance-sensitive data in memory that's frequently accessed. The +/// allocator also self-limits the peak memory usage to a dynamically defined +/// maximum. +/// +/// N is the lower-bound size of the block of memory to return from the +/// allocation function. N is used to compute the size of a block, which is +/// cache-line-size multiples worth of memory. We compute the size of a block by +/// determining how many cache lines worth of memory is required to subsume N. +/// +/// The Allocator instance will manage its own memory acquired through mmap. +/// This severely constrains the platforms on which this can be used to POSIX +/// systems where mmap semantics are well-defined. +/// +/// FIXME: Isolate the lower-level memory management to a different abstraction +/// that can be platform-specific. +template <size_t N> struct Allocator { + // The Allocator returns memory as Block instances. + struct Block { + /// Compute the minimum cache-line size multiple that is >= N. + static constexpr auto Size = nearest_boundary(N, kCacheLineSize); + void *Data; + }; + +private: + size_t MaxMemory{0}; + unsigned char *BackingStore = nullptr; + unsigned char *AlignedNextBlock = nullptr; + size_t AllocatedBlocks = 0; + bool Owned; + SpinMutex Mutex{}; + + void *Alloc() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Lock(&Mutex); + if (UNLIKELY(BackingStore == nullptr)) { + BackingStore = allocateBuffer(MaxMemory); + if (BackingStore == nullptr) { + if (Verbosity()) + Report("XRay Profiling: Failed to allocate memory for allocator.\n"); + return nullptr; + } + + AlignedNextBlock = BackingStore; + + // Ensure that NextBlock is aligned appropriately. + auto BackingStoreNum = reinterpret_cast<uintptr_t>(BackingStore); + auto AlignedNextBlockNum = nearest_boundary( + reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize); + if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) { + deallocateBuffer(BackingStore, MaxMemory); + AlignedNextBlock = BackingStore = nullptr; + if (Verbosity()) + Report("XRay Profiling: Cannot obtain enough memory from " + "preallocated region.\n"); + return nullptr; + } + + AlignedNextBlock = reinterpret_cast<unsigned char *>(AlignedNextBlockNum); + + // Assert that AlignedNextBlock is cache-line aligned. + DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize, + 0); + } + + if (((AllocatedBlocks + 1) * Block::Size) > MaxMemory) + return nullptr; + + // Align the pointer we'd like to return to an appropriate alignment, then + // advance the pointer from where to start allocations. + void *Result = AlignedNextBlock; + AlignedNextBlock = + reinterpret_cast<unsigned char *>(AlignedNextBlock) + Block::Size; + ++AllocatedBlocks; + return Result; + } + +public: + explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT + : MaxMemory(RoundUpTo(M, kCacheLineSize)), + BackingStore(nullptr), + AlignedNextBlock(nullptr), + AllocatedBlocks(0), + Owned(true), + Mutex() {} + + explicit Allocator(void *P, size_t M) XRAY_NEVER_INSTRUMENT + : MaxMemory(M), + BackingStore(reinterpret_cast<unsigned char *>(P)), + AlignedNextBlock(reinterpret_cast<unsigned char *>(P)), + AllocatedBlocks(0), + Owned(false), + Mutex() {} + + Allocator(const Allocator &) = delete; + Allocator &operator=(const Allocator &) = delete; + + Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT { + SpinMutexLock L0(&Mutex); + SpinMutexLock L1(&O.Mutex); + MaxMemory = O.MaxMemory; + O.MaxMemory = 0; + BackingStore = O.BackingStore; + O.BackingStore = nullptr; + AlignedNextBlock = O.AlignedNextBlock; + O.AlignedNextBlock = nullptr; + AllocatedBlocks = O.AllocatedBlocks; + O.AllocatedBlocks = 0; + Owned = O.Owned; + O.Owned = false; + } + + Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT { + SpinMutexLock L0(&Mutex); + SpinMutexLock L1(&O.Mutex); + MaxMemory = O.MaxMemory; + O.MaxMemory = 0; + if (BackingStore != nullptr) + deallocateBuffer(BackingStore, MaxMemory); + BackingStore = O.BackingStore; + O.BackingStore = nullptr; + AlignedNextBlock = O.AlignedNextBlock; + O.AlignedNextBlock = nullptr; + AllocatedBlocks = O.AllocatedBlocks; + O.AllocatedBlocks = 0; + Owned = O.Owned; + O.Owned = false; + return *this; + } + + Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; } + + ~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT { + if (Owned && BackingStore != nullptr) { + deallocateBuffer(BackingStore, MaxMemory); + } + } +}; + +} // namespace __xray + +#endif // XRAY_ALLOCATOR_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt new file mode 100644 index 000000000000..151ed703dd56 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt @@ -0,0 +1,6 @@ +# List of function matchers common to C/C++ applications that make sense to +# always instrument. You can use this as an argument to +# -fxray-always-instrument=<path> along with your project-specific lists. + +# Always instrument the main function. +fun:main diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp new file mode 100644 index 000000000000..e1818555906c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp @@ -0,0 +1,164 @@ +//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} + PO_BlxIp = 0xE12FFF3C, // BLX ip + PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} + PO_B20 = 0xEA000005 // B #20 +}; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return getMovwMask(Value >> 16); +} + +// Writes the following instructions: +// MOVW R<regNo>, #<lower 16 bits of the |Value|> +// MOVT R<regNo>, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadReg(uint8_t regNo, uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + // This is a fatal error: we cannot just report it and continue execution. + assert(regNo <= 15 && "Register number must be 0 to 15."); + // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ + *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); + Address++; + // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ + *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); + return Address + 1; +} + +// Writes the following instructions: +// MOVW r0, #<lower 16 bits of the |Value|> +// MOVT r0, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadR0(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(0, Address, Value); +} + +// Writes the following instructions: +// MOVW ip, #<lower 16 bits of the |Value|> +// MOVT ip, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadIP(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(12, Address, Value); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #20 + // 6 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // PUSH {r0, lr} + // MOVW r0, #<lower 16 bits of function ID> + // MOVT r0, #<higher 16 bits of function ID> + // MOVW ip, #<lower 16 bits of address of TracingHook> + // MOVT ip, #<higher 16 bits of address of TracingHook> + // BLX ip + // POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #20 + + uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address()); + uint32_t *CurAddress = FirstAddress + 1; + if (Enable) { + CurAddress = + write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); + CurAddress = + write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook)); + *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + CurAddress++; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); + } + __clear_cache(reinterpret_cast<char *>(FirstAddress), + reinterpret_cast<char *>(CurAddress)); + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in arm? + return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp new file mode 100644 index 000000000000..e0a5e7bb29ee --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp @@ -0,0 +1,49 @@ +//===-- xray_basic_flags.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay Basic flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_basic_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +/// Use via basicFlags(). +BasicFlags xray_basic_flags_dont_use_directly; + +void BasicFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayBasicFlags(FlagParser *P, + BasicFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedBasicFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_BASIC_OPTIONS + return SANITIZER_STRINGIFY(XRAY_BASIC_OPTIONS); +#else + return ""; +#endif +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h new file mode 100644 index 000000000000..2459effa8bae --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h @@ -0,0 +1,37 @@ +//===-- xray_basic_flags.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// XRay Basic Mode runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_BASIC_FLAGS_H +#define XRAY_BASIC_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct BasicFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_basic_flags.inc" +#undef XRAY_FLAG + + void setDefaults(); +}; + +extern BasicFlags xray_basic_flags_dont_use_directly; +extern void registerXRayBasicFlags(FlagParser *P, BasicFlags *F); +const char *useCompilerDefinedBasicFlags(); +inline BasicFlags *basicFlags() { return &xray_basic_flags_dont_use_directly; } + +} // namespace __xray + +#endif // XRAY_BASIC_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc new file mode 100644 index 000000000000..fb38c540d356 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc @@ -0,0 +1,23 @@ +//===-- xray_basic_flags.inc ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(int, func_duration_threshold_us, 5, + "Basic logging will try to skip functions that execute for fewer " + "microseconds than this threshold.") +XRAY_FLAG(int, max_stack_depth, 64, + "Basic logging will keep track of at most this deep a call stack, " + "any more and the recordings will be dropped.") +XRAY_FLAG(int, thread_buffer_size, 1024, + "The number of entries to keep on a per-thread buffer.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp new file mode 100644 index 000000000000..6e8e93131451 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp @@ -0,0 +1,515 @@ +//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of a simple in-memory log of XRay events. This defines a +// logging function that's compatible with the XRay handler interface, and +// routines for exporting data to files. +// +//===----------------------------------------------------------------------===// + +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/stat.h> +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +#include <sys/syscall.h> +#endif +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray/xray_records.h" +#include "xray_recursion_guard.h" +#include "xray_basic_flags.h" +#include "xray_basic_logging.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static SpinMutex LogMutex; + +namespace { +// We use elements of this type to record the entry TSC of every function ID we +// see as we're tracing a particular thread's execution. +struct alignas(16) StackEntry { + int32_t FuncId; + uint16_t Type; + uint8_t CPU; + uint8_t Padding; + uint64_t TSC; +}; + +static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); + +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { + void *InMemoryBuffer = nullptr; + size_t BufferSize = 0; + size_t BufferOffset = 0; + void *ShadowStack = nullptr; + size_t StackSize = 0; + size_t StackEntries = 0; + __xray::LogWriter *LogWriter = nullptr; +}; + +struct BasicLoggingOptions { + int DurationFilterMicros = 0; + size_t MaxStackDepth = 0; + size_t ThreadBufferSize = 0; +}; +} // namespace + +static pthread_key_t PThreadKey; + +static atomic_uint8_t BasicInitialized{0}; + +struct BasicLoggingOptions GlobalOptions; + +thread_local atomic_uint8_t Guard{0}; + +static atomic_uint8_t UseRealTSC{0}; +static atomic_uint64_t ThresholdTicks{0}; +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; + +static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { + LogWriter* LW = LogWriter::Open(); + if (LW == nullptr) + return LW; + + static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; + pthread_once(&DetectOnce, +[] { + if (atomic_load(&UseRealTSC, memory_order_acquire)) + atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); + }); + + // Since we're here, we get to write the header. We set it up so that the + // header will only be written once, at the start, and let the threads + // logging do writes which just append. + XRayFileHeader Header; + // Version 2 includes tail exit records. + // Version 3 includes pid inside records. + Header.Version = 3; + Header.Type = FileTypes::NAIVE_LOG; + Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); + + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + LW->WriteAll(reinterpret_cast<char *>(&Header), + reinterpret_cast<char *>(&Header) + sizeof(Header)); + return LW; +} + +static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static LogWriter *LW = nullptr; + pthread_once(&OnceInit, +[] { LW = getLog(); }); + return LW; +} + +static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local ThreadLocalData TLD; + thread_local bool UNUSED TOnce = [] { + if (GlobalOptions.ThreadBufferSize == 0) { + if (Verbosity()) + Report("Not initializing TLD since ThreadBufferSize == 0.\n"); + return false; + } + pthread_setspecific(PThreadKey, &TLD); + TLD.LogWriter = getGlobalLog(); + TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>( + InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, + nullptr, alignof(XRayRecord))); + TLD.BufferSize = GlobalOptions.ThreadBufferSize; + TLD.BufferOffset = 0; + if (GlobalOptions.MaxStackDepth == 0) { + if (Verbosity()) + Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); + TLD.StackSize = 0; + TLD.StackEntries = 0; + TLD.ShadowStack = nullptr; + return false; + } + TLD.ShadowStack = reinterpret_cast<StackEntry *>( + InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, + alignof(StackEntry))); + TLD.StackSize = GlobalOptions.MaxStackDepth; + TLD.StackEntries = 0; + return false; + }(); + return TLD; +} + +template <class RDTSC> +void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, + RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { + auto &TLD = getThreadLocalData(); + LogWriter *LW = getGlobalLog(); + if (LW == nullptr) + return; + + // Use a simple recursion guard, to handle cases where we're already logging + // and for one reason or another, this function gets called again in the same + // thread. + RecursionGuard G(Guard); + if (!G) + return; + + uint8_t CPU = 0; + uint64_t TSC = ReadTSC(CPU); + + switch (Type) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: { + // Short circuit if we've reached the maximum depth of the stack. + if (TLD.StackEntries++ >= TLD.StackSize) + return; + + // When we encounter an entry event, we keep track of the TSC and the CPU, + // and put it in the stack. + StackEntry E; + E.FuncId = FuncId; + E.CPU = CPU; + E.Type = Type; + E.TSC = TSC; + auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + + (sizeof(StackEntry) * (TLD.StackEntries - 1)); + internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); + break; + } + case XRayEntryType::EXIT: + case XRayEntryType::TAIL: { + if (TLD.StackEntries == 0) + break; + + if (--TLD.StackEntries >= TLD.StackSize) + return; + + // When we encounter an exit event, we check whether all the following are + // true: + // + // - The Function ID is the same as the most recent entry in the stack. + // - The CPU is the same as the most recent entry in the stack. + // - The Delta of the TSCs is less than the threshold amount of time we're + // looking to record. + // + // If all of these conditions are true, we pop the stack and don't write a + // record and move the record offset back. + StackEntry StackTop; + auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + + (sizeof(StackEntry) * TLD.StackEntries); + internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); + if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && + StackTop.TSC < TSC) { + auto Delta = TSC - StackTop.TSC; + if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { + DCHECK(TLD.BufferOffset > 0); + TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; + return; + } + } + break; + } + default: + // Should be unreachable. + DCHECK(false && "Unsupported XRayEntryType encountered."); + break; + } + + // First determine whether the delta between the function's enter record and + // the exit record is higher than the threshold. + XRayRecord R; + R.RecordType = RecordTypes::NORMAL; + R.CPU = CPU; + R.TSC = TSC; + R.TId = GetTid(); + R.PId = internal_getpid(); + R.Type = Type; + R.FuncId = FuncId; + auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer); + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + if (++TLD.BufferOffset == TLD.BufferSize) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast<char *>(FirstEntry), + reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } +} + +template <class RDTSC> +void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, + RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { + auto &TLD = getThreadLocalData(); + auto FirstEntry = + reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer); + const auto &BuffLen = TLD.BufferSize; + LogWriter *LW = getGlobalLog(); + if (LW == nullptr) + return; + + // First we check whether there's enough space to write the data consecutively + // in the thread-local buffer. If not, we first flush the buffer before + // attempting to write the two records that must be consecutive. + if (TLD.BufferOffset + 2 > BuffLen) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast<char *>(FirstEntry), + reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } + + // Then we write the "we have an argument" record. + InMemoryRawLog(FuncId, Type, ReadTSC); + + RecursionGuard G(Guard); + if (!G) + return; + + // And, from here on write the arg payload. + XRayArgPayload R; + R.RecordType = RecordTypes::ARG_PAYLOAD; + R.FuncId = FuncId; + R.TId = GetTid(); + R.PId = internal_getpid(); + R.Arg = Arg1; + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + if (++TLD.BufferOffset == BuffLen) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast<char *>(FirstEntry), + reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } +} + +void basicLoggingHandleArg0RealTSC(int32_t FuncId, + XRayEntryType Type) XRAY_NEVER_INSTRUMENT { + InMemoryRawLog(FuncId, Type, readTSC); +} + +void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) + XRAY_NEVER_INSTRUMENT { + InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; + }); +} + +void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); +} + +void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + InMemoryRawLogWithArg( + FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; + }); +} + +static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { + ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); + auto ExitGuard = at_scope_exit([&TLD] { + // Clean up dynamic resources. + if (TLD.InMemoryBuffer) + InternalFree(TLD.InMemoryBuffer); + if (TLD.ShadowStack) + InternalFree(TLD.ShadowStack); + if (Verbosity()) + Report("Cleaned up log for TID: %d\n", GetTid()); + }); + + if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { + if (Verbosity()) + Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(), + TLD.BufferOffset); + return; + } + + { + SpinMutexLock L(&LogMutex); + TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer), + reinterpret_cast<char *>(TLD.InMemoryBuffer) + + (sizeof(XRayRecord) * TLD.BufferOffset)); + } + + // Because this thread's exit could be the last one trying to write to + // the file and that we're not able to close out the file properly, we + // sync instead and hope that the pending writes are flushed as the + // thread exits. + TLD.LogWriter->Flush(); +} + +XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, + UNUSED size_t BufferMax, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + uint8_t Expected = 0; + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, + memory_order_acq_rel)) { + if (Verbosity()) + Report("Basic logging already initialized.\n"); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; + } + + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once(&OnceInit, +[] { + pthread_key_create(&PThreadKey, TLDDestructor); + atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); + // Initialize the global TicksPerSec value. + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : NanosecondsPerSecond, + memory_order_release); + if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) + Report("WARNING: Required CPU features missing for XRay instrumentation, " + "using emulation instead.\n"); + }); + + FlagParser P; + BasicFlags F; + F.setDefaults(); + registerXRayBasicFlags(&P, &F); + P.ParseString(useCompilerDefinedBasicFlags()); + auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + + P.ParseString(EnvOpts); + + // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options + // set through XRAY_OPTIONS instead. + if (internal_strlen(EnvOpts) == 0) { + F.func_duration_threshold_us = + flags()->xray_naive_log_func_duration_threshold_us; + F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; + F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; + } + + P.ParseString(static_cast<const char *>(Options)); + GlobalOptions.ThreadBufferSize = F.thread_buffer_size; + GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; + GlobalOptions.MaxStackDepth = F.max_stack_depth; + *basicFlags() = F; + + atomic_store(&ThresholdTicks, + atomic_load(&TicksPerSec, memory_order_acquire) * + GlobalOptions.DurationFilterMicros / 1000000, + memory_order_release); + __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg1RealTSC + : basicLoggingHandleArg1EmulateTSC); + __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg0RealTSC + : basicLoggingHandleArg0EmulateTSC); + + // TODO: Implement custom event and typed event handling support in Basic + // Mode. + __xray_remove_customevent_handler(); + __xray_remove_typedevent_handler(); + + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { + uint8_t Expected = 0; + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, + memory_order_acq_rel) && + Verbosity()) + Report("Basic logging already finalized.\n"); + + // Nothing really to do aside from marking state of the global to be + // uninitialized. + + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { + // This really does nothing, since flushing the logs happen at the end of a + // thread's lifetime, or when the buffers are full. + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +// This is a handler that, effectively, does nothing. +void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { +} + +bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { + XRayLogImpl Impl{ + basicLoggingInit, + basicLoggingFinalize, + basicLoggingHandleArg0Empty, + basicLoggingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) + Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", + RegistrationResult); + if (flags()->xray_naive_log || + !internal_strcmp(flags()->xray_mode, "xray-basic")) { + auto SelectResult = __xray_log_select_mode("xray-basic"); + if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); + return false; + } + + // We initialize the implementation using the data we get from the + // XRAY_BASIC_OPTIONS environment variable, at this point of the + // implementation. + auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); + auto InitResult = + __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); + if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { + if (Verbosity()) + Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); + return false; + } + + // At this point we know that we've successfully initialized Basic mode + // tracing, and the only chance we're going to get for the current thread to + // clean-up may be at thread/program exit. To ensure that we're going to get + // the cleanup even without calling the finalization routines, we're + // registering a program exit function that will do the cleanup. + static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; + pthread_once(&DynamicOnce, +[] { + static void *FakeTLD = nullptr; + FakeTLD = &getThreadLocalData(); + Atexit(+[] { TLDDestructor(FakeTLD); }); + }); + } + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h new file mode 100644 index 000000000000..89caca66b585 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h @@ -0,0 +1,42 @@ +//===-- xray_basic_logging.h ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_INMEMORY_LOG_H +#define XRAY_XRAY_INMEMORY_LOG_H + +#include "xray/xray_log_interface.h" + +/// Basic (Naive) Mode +/// ================== +/// +/// This implementation hooks in through the XRay logging implementation +/// framework. The Basic Mode implementation will keep appending to a file as +/// soon as the thread-local buffers are full. It keeps minimal in-memory state +/// and does the minimum filtering required to keep log files smaller. + +namespace __xray { + +XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, + void *Options, size_t OptionsSize); +XRayLogInitStatus basicLoggingFinalize(); + +void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Entry); +void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Entry); +void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Entry, + uint64_t Arg1); +void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Entry, + uint64_t Arg1); +XRayLogFlushStatus basicLoggingFlush(); +XRayLogInitStatus basicLoggingReset(); + +} // namespace __xray + +#endif // XRAY_XRAY_INMEMORY_LOG_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp new file mode 100644 index 000000000000..bad91e036cef --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp @@ -0,0 +1,237 @@ +//===-- xray_buffer_queue.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_libc.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_allocator.h" +#include "xray_defs.h" +#include <memory> +#include <sys/mman.h> + +using namespace __xray; + +namespace { + +BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) { + auto B = + allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); + return B == nullptr ? nullptr + : reinterpret_cast<BufferQueue::ControlBlock *>(B); +} + +void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size, + size_t Count) { + deallocateBuffer(reinterpret_cast<unsigned char *>(C), + (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); +} + +void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { + if (C == nullptr) + return; + if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) + deallocControlBlock(C, Size, Count); +} + +void incRefCount(BufferQueue::ControlBlock *C) { + if (C == nullptr) + return; + atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); +} + +// We use a struct to ensure that we are allocating one atomic_uint64_t per +// cache line. This allows us to not worry about false-sharing among atomic +// objects being updated (constantly) by different threads. +struct ExtentsPadded { + union { + atomic_uint64_t Extents; + unsigned char Storage[kCacheLineSize]; + }; +}; + +constexpr size_t kExtentsSize = sizeof(ExtentsPadded); + +} // namespace + +BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { + SpinMutexLock Guard(&Mutex); + + if (!finalizing()) + return BufferQueue::ErrorCode::AlreadyInitialized; + + cleanupBuffers(); + + bool Success = false; + BufferSize = BS; + BufferCount = BC; + + BackingStore = allocControlBlock(BufferSize, BufferCount); + if (BackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupBackingStore = at_scope_exit([&, this] { + if (Success) + return; + deallocControlBlock(BackingStore, BufferSize, BufferCount); + BackingStore = nullptr; + }); + + // Initialize enough atomic_uint64_t instances, each + ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); + if (ExtentsBackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupExtentsBackingStore = at_scope_exit([&, this] { + if (Success) + return; + deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); + ExtentsBackingStore = nullptr; + }); + + Buffers = initArray<BufferRep>(BufferCount); + if (Buffers == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + // At this point we increment the generation number to associate the buffers + // to the new generation. + atomic_fetch_add(&Generation, 1, memory_order_acq_rel); + + // First, we initialize the refcount in the ControlBlock, which we treat as + // being at the start of the BackingStore pointer. + atomic_store(&BackingStore->RefCount, 1, memory_order_release); + atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); + + // Then we initialise the individual buffers that sub-divide the whole backing + // store. Each buffer will start at the `Data` member of the ControlBlock, and + // will be offsets from these locations. + for (size_t i = 0; i < BufferCount; ++i) { + auto &T = Buffers[i]; + auto &Buf = T.Buff; + auto *E = reinterpret_cast<ExtentsPadded *>(&ExtentsBackingStore->Data + + (kExtentsSize * i)); + Buf.Extents = &E->Extents; + atomic_store(Buf.Extents, 0, memory_order_release); + Buf.Generation = generation(); + Buf.Data = &BackingStore->Data + (BufferSize * i); + Buf.Size = BufferSize; + Buf.BackingStore = BackingStore; + Buf.ExtentsBackingStore = ExtentsBackingStore; + Buf.Count = BufferCount; + T.Used = false; + } + + Next = Buffers; + First = Buffers; + LiveBuffers = 0; + atomic_store(&Finalizing, 0, memory_order_release); + Success = true; + return BufferQueue::ErrorCode::Ok; +} + +BufferQueue::BufferQueue(size_t B, size_t N, + bool &Success) XRAY_NEVER_INSTRUMENT + : BufferSize(B), + BufferCount(N), + Mutex(), + Finalizing{1}, + BackingStore(nullptr), + ExtentsBackingStore(nullptr), + Buffers(nullptr), + Next(Buffers), + First(Buffers), + LiveBuffers(0), + Generation{0} { + Success = init(B, N) == BufferQueue::ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { + if (atomic_load(&Finalizing, memory_order_acquire)) + return ErrorCode::QueueFinalizing; + + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (LiveBuffers == BufferCount) + return ErrorCode::NotEnoughMemory; + B = Next++; + if (Next == (Buffers + BufferCount)) + Next = Buffers; + ++LiveBuffers; + } + + incRefCount(BackingStore); + incRefCount(ExtentsBackingStore); + Buf = B->Buff; + Buf.Generation = generation(); + B->Used = true; + return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { + // Check whether the buffer being referred to is within the bounds of the + // backing store's range. + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (Buf.Generation != generation() || LiveBuffers == 0) { + Buf = {}; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + return BufferQueue::ErrorCode::Ok; + } + + if (Buf.Data < &BackingStore->Data || + Buf.Data > &BackingStore->Data + (BufferCount * BufferSize)) + return BufferQueue::ErrorCode::UnrecognizedBuffer; + + --LiveBuffers; + B = First++; + if (First == (Buffers + BufferCount)) + First = Buffers; + } + + // Now that the buffer has been released, we mark it as "used". + B->Buff = Buf; + B->Used = true; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), + memory_order_release); + Buf = {}; + return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::finalize() { + if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel)) + return ErrorCode::QueueFinalizing; + return ErrorCode::Ok; +} + +void BufferQueue::cleanupBuffers() { + for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) + B->~BufferRep(); + deallocateBuffer(Buffers, BufferCount); + decRefCount(BackingStore, BufferSize, BufferCount); + decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); + BackingStore = nullptr; + ExtentsBackingStore = nullptr; + Buffers = nullptr; + BufferCount = 0; + BufferSize = 0; +} + +BufferQueue::~BufferQueue() { cleanupBuffers(); } diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h new file mode 100644 index 000000000000..e1739d050f3d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h @@ -0,0 +1,280 @@ +//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_BUFFER_QUEUE_H +#define XRAY_BUFFER_QUEUE_H + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "xray_defs.h" +#include <cstddef> +#include <cstdint> + +namespace __xray { + +/// BufferQueue implements a circular queue of fixed sized buffers (much like a +/// freelist) but is concerned with making it quick to initialise, finalise, and +/// get from or return buffers to the queue. This is one key component of the +/// "flight data recorder" (FDR) mode to support ongoing XRay function call +/// trace collection. +class BufferQueue { +public: + /// ControlBlock represents the memory layout of how we interpret the backing + /// store for all buffers and extents managed by a BufferQueue instance. The + /// ControlBlock has the reference count as the first member, sized according + /// to platform-specific cache-line size. We never use the Buffer member of + /// the union, which is only there for compiler-supported alignment and + /// sizing. + /// + /// This ensures that the `Data` member will be placed at least kCacheLineSize + /// bytes from the beginning of the structure. + struct ControlBlock { + union { + atomic_uint64_t RefCount; + char Buffer[kCacheLineSize]; + }; + + /// We need to make this size 1, to conform to the C++ rules for array data + /// members. Typically, we want to subtract this 1 byte for sizing + /// information. + char Data[1]; + }; + + struct Buffer { + atomic_uint64_t *Extents = nullptr; + uint64_t Generation{0}; + void *Data = nullptr; + size_t Size = 0; + + private: + friend class BufferQueue; + ControlBlock *BackingStore = nullptr; + ControlBlock *ExtentsBackingStore = nullptr; + size_t Count = 0; + }; + + struct BufferRep { + // The managed buffer. + Buffer Buff; + + // This is true if the buffer has been returned to the available queue, and + // is considered "used" by another thread. + bool Used = false; + }; + +private: + // This models a ForwardIterator. |T| Must be either a `Buffer` or `const + // Buffer`. Note that we only advance to the "used" buffers, when + // incrementing, so that at dereference we're always at a valid point. + template <class T> class Iterator { + public: + BufferRep *Buffers = nullptr; + size_t Offset = 0; + size_t Max = 0; + + Iterator &operator++() { + DCHECK_NE(Offset, Max); + do { + ++Offset; + } while (!Buffers[Offset].Used && Offset != Max); + return *this; + } + + Iterator operator++(int) { + Iterator C = *this; + ++(*this); + return C; + } + + T &operator*() const { return Buffers[Offset].Buff; } + + T *operator->() const { return &(Buffers[Offset].Buff); } + + Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT + : Buffers(Root), + Offset(O), + Max(M) { + // We want to advance to the first Offset where the 'Used' property is + // true, or to the end of the list/queue. + while (!Buffers[Offset].Used && Offset != Max) { + ++Offset; + } + } + + Iterator() = default; + Iterator(const Iterator &) = default; + Iterator(Iterator &&) = default; + Iterator &operator=(const Iterator &) = default; + Iterator &operator=(Iterator &&) = default; + ~Iterator() = default; + + template <class V> + friend bool operator==(const Iterator &L, const Iterator<V> &R) { + DCHECK_EQ(L.Max, R.Max); + return L.Buffers == R.Buffers && L.Offset == R.Offset; + } + + template <class V> + friend bool operator!=(const Iterator &L, const Iterator<V> &R) { + return !(L == R); + } + }; + + // Size of each individual Buffer. + size_t BufferSize; + + // Amount of pre-allocated buffers. + size_t BufferCount; + + SpinMutex Mutex; + atomic_uint8_t Finalizing; + + // The collocated ControlBlock and buffer storage. + ControlBlock *BackingStore; + + // The collocated ControlBlock and extents storage. + ControlBlock *ExtentsBackingStore; + + // A dynamically allocated array of BufferRep instances. + BufferRep *Buffers; + + // Pointer to the next buffer to be handed out. + BufferRep *Next; + + // Pointer to the entry in the array where the next released buffer will be + // placed. + BufferRep *First; + + // Count of buffers that have been handed out through 'getBuffer'. + size_t LiveBuffers; + + // We use a generation number to identify buffers and which generation they're + // associated with. + atomic_uint64_t Generation; + + /// Releases references to the buffers backed by the current buffer queue. + void cleanupBuffers(); + +public: + enum class ErrorCode : unsigned { + Ok, + NotEnoughMemory, + QueueFinalizing, + UnrecognizedBuffer, + AlreadyFinalized, + AlreadyInitialized, + }; + + static const char *getErrorString(ErrorCode E) { + switch (E) { + case ErrorCode::Ok: + return "(none)"; + case ErrorCode::NotEnoughMemory: + return "no available buffers in the queue"; + case ErrorCode::QueueFinalizing: + return "queue already finalizing"; + case ErrorCode::UnrecognizedBuffer: + return "buffer being returned not owned by buffer queue"; + case ErrorCode::AlreadyFinalized: + return "queue already finalized"; + case ErrorCode::AlreadyInitialized: + return "queue already initialized"; + } + return "unknown error"; + } + + /// Initialise a queue of size |N| with buffers of size |B|. We report success + /// through |Success|. + BufferQueue(size_t B, size_t N, bool &Success); + + /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an + /// error in case there are no available buffers to return when we will run + /// over the upper bound for the total buffers. + /// + /// Requirements: + /// - BufferQueue is not finalising. + /// + /// Returns: + /// - ErrorCode::NotEnoughMemory on exceeding MaxSize. + /// - ErrorCode::Ok when we find a Buffer. + /// - ErrorCode::QueueFinalizing or ErrorCode::AlreadyFinalized on + /// a finalizing/finalized BufferQueue. + ErrorCode getBuffer(Buffer &Buf); + + /// Updates |Buf| to point to nullptr, with size 0. + /// + /// Returns: + /// - ErrorCode::Ok when we successfully release the buffer. + /// - ErrorCode::UnrecognizedBuffer for when this BufferQueue does not own + /// the buffer being released. + ErrorCode releaseBuffer(Buffer &Buf); + + /// Initializes the buffer queue, starting a new generation. We can re-set the + /// size of buffers with |BS| along with the buffer count with |BC|. + /// + /// Returns: + /// - ErrorCode::Ok when we successfully initialize the buffer. This + /// requires that the buffer queue is previously finalized. + /// - ErrorCode::AlreadyInitialized when the buffer queue is not finalized. + ErrorCode init(size_t BS, size_t BC); + + bool finalizing() const { + return atomic_load(&Finalizing, memory_order_acquire); + } + + uint64_t generation() const { + return atomic_load(&Generation, memory_order_acquire); + } + + /// Returns the configured size of the buffers in the buffer queue. + size_t ConfiguredBufferSize() const { return BufferSize; } + + /// Sets the state of the BufferQueue to finalizing, which ensures that: + /// + /// - All subsequent attempts to retrieve a Buffer will fail. + /// - All releaseBuffer operations will not fail. + /// + /// After a call to finalize succeeds, all subsequent calls to finalize will + /// fail with ErrorCode::QueueFinalizing. + ErrorCode finalize(); + + /// Applies the provided function F to each Buffer in the queue, only if the + /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a + /// releaseBuffer(...) operation). + template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT { + SpinMutexLock G(&Mutex); + for (auto I = begin(), E = end(); I != E; ++I) + Fn(*I); + } + + using const_iterator = Iterator<const Buffer>; + using iterator = Iterator<Buffer>; + + /// Provides iterator access to the raw Buffer instances. + iterator begin() const { return iterator(Buffers, 0, BufferCount); } + const_iterator cbegin() const { + return const_iterator(Buffers, 0, BufferCount); + } + iterator end() const { return iterator(Buffers, BufferCount, BufferCount); } + const_iterator cend() const { + return const_iterator(Buffers, BufferCount, BufferCount); + } + + // Cleans up allocated buffers. + ~BufferQueue(); +}; + +} // namespace __xray + +#endif // XRAY_BUFFER_QUEUE_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h new file mode 100644 index 000000000000..2da03c3c3451 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h @@ -0,0 +1,31 @@ +//===-- xray_defs.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Common definitions useful for XRay sources. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_DEFS_H +#define XRAY_XRAY_DEFS_H + +#if XRAY_SUPPORTED +#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument)) +#else +#define XRAY_NEVER_INSTRUMENT +#endif + +#if SANITIZER_NETBSD +// NetBSD: thread_local is not aligned properly, and the code relying +// on it segfaults +#define XRAY_TLS_ALIGNAS(x) +#define XRAY_HAS_TLS_ALIGNAS 0 +#else +#define XRAY_TLS_ALIGNAS(x) alignas(x) +#define XRAY_HAS_TLS_ALIGNAS 1 +#endif + +#endif // XRAY_XRAY_DEFS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h new file mode 100644 index 000000000000..28a3546caa7b --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h @@ -0,0 +1,372 @@ +//===-- xray_fdr_controller.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ +#define COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ + +#include <limits> +#include <time.h> + +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_buffer_queue.h" +#include "xray_fdr_log_writer.h" + +namespace __xray { + +template <size_t Version = 5> class FDRController { + BufferQueue *BQ; + BufferQueue::Buffer &B; + FDRLogWriter &W; + int (*WallClockReader)(clockid_t, struct timespec *) = 0; + uint64_t CycleThreshold = 0; + + uint64_t LastFunctionEntryTSC = 0; + uint64_t LatestTSC = 0; + uint16_t LatestCPU = 0; + tid_t TId = 0; + pid_t PId = 0; + bool First = true; + + uint32_t UndoableFunctionEnters = 0; + uint32_t UndoableTailExits = 0; + + bool finalized() const XRAY_NEVER_INSTRUMENT { + return BQ == nullptr || BQ->finalizing(); + } + + bool hasSpace(size_t S) XRAY_NEVER_INSTRUMENT { + return B.Data != nullptr && B.Generation == BQ->generation() && + W.getNextRecord() + S <= reinterpret_cast<char *>(B.Data) + B.Size; + } + + constexpr int32_t mask(int32_t FuncId) const XRAY_NEVER_INSTRUMENT { + return FuncId & ((1 << 29) - 1); + } + + bool getNewBuffer() XRAY_NEVER_INSTRUMENT { + if (BQ->getBuffer(B) != BufferQueue::ErrorCode::Ok) + return false; + + W.resetRecord(); + DCHECK_EQ(W.getNextRecord(), B.Data); + LatestTSC = 0; + LatestCPU = 0; + First = true; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; + atomic_store(B.Extents, 0, memory_order_release); + return true; + } + + bool setupNewBuffer() XRAY_NEVER_INSTRUMENT { + if (finalized()) + return false; + + DCHECK(hasSpace(sizeof(MetadataRecord) * 3)); + TId = GetTid(); + PId = internal_getpid(); + struct timespec TS { + 0, 0 + }; + WallClockReader(CLOCK_MONOTONIC, &TS); + + MetadataRecord Metadata[] = { + // Write out a MetadataRecord to signify that this is the start of a new + // buffer, associated with a particular thread, with a new CPU. For the + // data, we have 15 bytes to squeeze as much information as we can. At + // this point we only write down the following bytes: + // - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8 + // bytes) + createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>( + static_cast<int32_t>(TId)), + + // Also write the WalltimeMarker record. We only really need microsecond + // precision here, and enforce across platforms that we need 64-bit + // seconds and 32-bit microseconds encoded in the Metadata record. + createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>( + static_cast<int64_t>(TS.tv_sec), + static_cast<int32_t>(TS.tv_nsec / 1000)), + + // Also write the Pid record. + createMetadataRecord<MetadataRecord::RecordKinds::Pid>( + static_cast<int32_t>(PId)), + }; + + if (finalized()) + return false; + return W.writeMetadataRecords(Metadata); + } + + bool prepareBuffer(size_t S) XRAY_NEVER_INSTRUMENT { + if (finalized()) + return returnBuffer(); + + if (UNLIKELY(!hasSpace(S))) { + if (!returnBuffer()) + return false; + if (!getNewBuffer()) + return false; + if (!setupNewBuffer()) + return false; + } + + if (First) { + First = false; + W.resetRecord(); + atomic_store(B.Extents, 0, memory_order_release); + return setupNewBuffer(); + } + + return true; + } + + bool returnBuffer() XRAY_NEVER_INSTRUMENT { + if (BQ == nullptr) + return false; + + First = true; + if (finalized()) { + BQ->releaseBuffer(B); // ignore result. + return false; + } + + return BQ->releaseBuffer(B) == BufferQueue::ErrorCode::Ok; + } + + enum class PreambleResult { NoChange, WroteMetadata, InvalidBuffer }; + PreambleResult recordPreamble(uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + if (UNLIKELY(LatestCPU != CPU || LatestTSC == 0)) { + // We update our internal tracking state for the Latest TSC and CPU we've + // seen, then write out the appropriate metadata and function records. + LatestTSC = TSC; + LatestCPU = CPU; + + if (B.Generation != BQ->generation()) + return PreambleResult::InvalidBuffer; + + W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC); + return PreambleResult::WroteMetadata; + } + + DCHECK_EQ(LatestCPU, CPU); + + if (UNLIKELY(LatestTSC > TSC || + TSC - LatestTSC > + uint64_t{std::numeric_limits<int32_t>::max()})) { + // Either the TSC has wrapped around from the last TSC we've seen or the + // delta is too large to fit in a 32-bit signed integer, so we write a + // wrap-around record. + LatestTSC = TSC; + + if (B.Generation != BQ->generation()) + return PreambleResult::InvalidBuffer; + + W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC); + return PreambleResult::WroteMetadata; + } + + return PreambleResult::NoChange; + } + + bool rewindRecords(int32_t FuncId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + // Undo one enter record, because at this point we are either at the state + // of: + // - We are exiting a function that we recently entered. + // - We are exiting a function that was the result of a sequence of tail + // exits, and we can check whether the tail exits can be re-wound. + // + FunctionRecord F; + W.undoWrites(sizeof(FunctionRecord)); + if (B.Generation != BQ->generation()) + return false; + internal_memcpy(&F, W.getNextRecord(), sizeof(FunctionRecord)); + + DCHECK(F.RecordKind == + uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && + "Expected to find function entry recording when rewinding."); + DCHECK_EQ(F.FuncId, FuncId & ~(0x0F << 28)); + + LatestTSC -= F.TSCDelta; + if (--UndoableFunctionEnters != 0) { + LastFunctionEntryTSC -= F.TSCDelta; + return true; + } + + LastFunctionEntryTSC = 0; + auto RewindingTSC = LatestTSC; + auto RewindingRecordPtr = W.getNextRecord() - sizeof(FunctionRecord); + while (UndoableTailExits) { + if (B.Generation != BQ->generation()) + return false; + internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); + DCHECK_EQ(F.RecordKind, + uint8_t(FunctionRecord::RecordKinds::FunctionTailExit)); + RewindingTSC -= F.TSCDelta; + RewindingRecordPtr -= sizeof(FunctionRecord); + if (B.Generation != BQ->generation()) + return false; + internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); + + // This tail call exceeded the threshold duration. It will not be erased. + if ((TSC - RewindingTSC) >= CycleThreshold) { + UndoableTailExits = 0; + return true; + } + + --UndoableTailExits; + W.undoWrites(sizeof(FunctionRecord) * 2); + LatestTSC = RewindingTSC; + } + return true; + } + +public: + template <class WallClockFunc> + FDRController(BufferQueue *BQ, BufferQueue::Buffer &B, FDRLogWriter &W, + WallClockFunc R, uint64_t C) XRAY_NEVER_INSTRUMENT + : BQ(BQ), + B(B), + W(W), + WallClockReader(R), + CycleThreshold(C) {} + + bool functionEnter(int32_t FuncId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + if (finalized() || + !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) + return returnBuffer(); + + auto PreambleStatus = recordPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) + return returnBuffer(); + + if (PreambleStatus == PreambleResult::WroteMetadata) { + UndoableFunctionEnters = 1; + UndoableTailExits = 0; + } else { + ++UndoableFunctionEnters; + } + + auto Delta = TSC - LatestTSC; + LastFunctionEntryTSC = TSC; + LatestTSC = TSC; + return W.writeFunction(FDRLogWriter::FunctionRecordKind::Enter, + mask(FuncId), Delta); + } + + bool functionTailExit(int32_t FuncId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + if (finalized()) + return returnBuffer(); + + if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) + return returnBuffer(); + + auto PreambleStatus = recordPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) + return returnBuffer(); + + if (PreambleStatus == PreambleResult::NoChange && + UndoableFunctionEnters != 0 && + TSC - LastFunctionEntryTSC < CycleThreshold) + return rewindRecords(FuncId, TSC, CPU); + + UndoableTailExits = UndoableFunctionEnters ? UndoableTailExits + 1 : 0; + UndoableFunctionEnters = 0; + auto Delta = TSC - LatestTSC; + LatestTSC = TSC; + return W.writeFunction(FDRLogWriter::FunctionRecordKind::TailExit, + mask(FuncId), Delta); + } + + bool functionEnterArg(int32_t FuncId, uint64_t TSC, uint16_t CPU, + uint64_t Arg) XRAY_NEVER_INSTRUMENT { + if (finalized() || + !prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord)) || + recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) + return returnBuffer(); + + auto Delta = TSC - LatestTSC; + LatestTSC = TSC; + LastFunctionEntryTSC = 0; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; + + return W.writeFunctionWithArg(FDRLogWriter::FunctionRecordKind::EnterArg, + mask(FuncId), Delta, Arg); + } + + bool functionExit(int32_t FuncId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + if (finalized() || + !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) + return returnBuffer(); + + auto PreambleStatus = recordPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) + return returnBuffer(); + + if (PreambleStatus == PreambleResult::NoChange && + UndoableFunctionEnters != 0 && + TSC - LastFunctionEntryTSC < CycleThreshold) + return rewindRecords(FuncId, TSC, CPU); + + auto Delta = TSC - LatestTSC; + LatestTSC = TSC; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; + return W.writeFunction(FDRLogWriter::FunctionRecordKind::Exit, mask(FuncId), + Delta); + } + + bool customEvent(uint64_t TSC, uint16_t CPU, const void *Event, + int32_t EventSize) XRAY_NEVER_INSTRUMENT { + if (finalized() || + !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) || + recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) + return returnBuffer(); + + auto Delta = TSC - LatestTSC; + LatestTSC = TSC; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; + return W.writeCustomEvent(Delta, Event, EventSize); + } + + bool typedEvent(uint64_t TSC, uint16_t CPU, uint16_t EventType, + const void *Event, int32_t EventSize) XRAY_NEVER_INSTRUMENT { + if (finalized() || + !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) || + recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) + return returnBuffer(); + + auto Delta = TSC - LatestTSC; + LatestTSC = TSC; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; + return W.writeTypedEvent(Delta, EventType, Event, EventSize); + } + + bool flush() XRAY_NEVER_INSTRUMENT { + if (finalized()) { + returnBuffer(); // ignore result. + return true; + } + return returnBuffer(); + } +}; + +} // namespace __xray + +#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp new file mode 100644 index 000000000000..272b0b7cb1f7 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp @@ -0,0 +1,47 @@ +//===-- xray_fdr_flags.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay FDR flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_fdr_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +FDRFlags xray_fdr_flags_dont_use_directly; // use via fdrFlags(). + +void FDRFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFDRFlags(FlagParser *P, FDRFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedFDRFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_FDR_OPTIONS + return SANITIZER_STRINGIFY(XRAY_FDR_OPTIONS); +#else + return ""; +#endif +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h new file mode 100644 index 000000000000..d6f00dc48006 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h @@ -0,0 +1,37 @@ +//===-- xray_fdr_flags.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the flags for the flight-data-recorder mode implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FDR_FLAGS_H +#define XRAY_FDR_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct FDRFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG + + void setDefaults(); +}; + +extern FDRFlags xray_fdr_flags_dont_use_directly; +extern void registerXRayFDRFlags(FlagParser *P, FDRFlags *F); +const char *useCompilerDefinedFDRFlags(); +inline FDRFlags *fdrFlags() { return &xray_fdr_flags_dont_use_directly; } + +} // namespace __xray + +#endif // XRAY_FDR_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc new file mode 100644 index 000000000000..6082b7e78521 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc @@ -0,0 +1,28 @@ +//===-- xray_fdr_flags.inc --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay FDR Mode runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +// FDR (Flight Data Recorder) Mode logging options. +XRAY_FLAG(int, func_duration_threshold_us, 5, + "FDR logging will try to skip functions that execute for fewer " + "microseconds than this threshold.") +XRAY_FLAG(int, grace_period_ms, 100, + "FDR logging will wait this much time in milliseconds before " + "actually flushing the log; this gives a chance for threads to " + "notice that the log has been finalized and clean up.") +XRAY_FLAG(int, buffer_size, 16384, + "Size of buffers in the circular buffer queue.") +XRAY_FLAG(int, buffer_max, 100, "Maximum number of buffers in the queue.") +XRAY_FLAG(bool, no_file_flush, false, + "Set to true to not write log files by default.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h new file mode 100644 index 000000000000..7a5d438314af --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h @@ -0,0 +1,75 @@ +//===-- xray_fdr_log_records.h -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H +#define XRAY_XRAY_FDR_LOG_RECORDS_H +#include <cstdint> + +namespace __xray { + +enum class RecordType : uint8_t { Function, Metadata }; + +// A MetadataRecord encodes the kind of record in its first byte, and have 15 +// additional bytes in the end to hold free-form data. +struct alignas(16) MetadataRecord { + // A MetadataRecord must always have a type of 1. + /* RecordType */ uint8_t Type : 1; + + // Each kind of record is represented as a 7-bit value (even though we use an + // unsigned 8-bit enum class to do so). + enum class RecordKinds : uint8_t { + NewBuffer, + EndOfBuffer, + NewCPUId, + TSCWrap, + WalltimeMarker, + CustomEventMarker, + CallArgument, + BufferExtents, + TypedEventMarker, + Pid, + }; + + // Use 7 bits to identify this record type. + /* RecordKinds */ uint8_t RecordKind : 7; + char Data[15]; +} __attribute__((packed)); + +static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord."); + +struct alignas(8) FunctionRecord { + // A FunctionRecord must always have a type of 0. + /* RecordType */ uint8_t Type : 1; + enum class RecordKinds { + FunctionEnter = 0x00, + FunctionExit = 0x01, + FunctionTailExit = 0x02, + }; + /* RecordKinds */ uint8_t RecordKind : 3; + + // We only use 28 bits of the function ID, so that we can use as few bytes as + // possible. This means we only support 2^28 (268,435,456) unique function ids + // in a single binary. + int FuncId : 28; + + // We use another 4 bytes to hold the delta between the previous entry's TSC. + // In case we've found that the distance is greater than the allowable 32 bits + // (either because we are running in a different CPU and the TSC might be + // different then), we should use a MetadataRecord before this FunctionRecord + // that will contain the full TSC for that CPU, and keep this to 0. + uint32_t TSCDelta; +} __attribute__((packed)); + +static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); + +} // namespace __xray + +#endif // XRAY_XRAY_FDR_LOG_RECORDS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h new file mode 100644 index 000000000000..0378663c3907 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h @@ -0,0 +1,231 @@ +//===-- xray_fdr_log_writer.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ +#define COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ + +#include "xray_buffer_queue.h" +#include "xray_fdr_log_records.h" +#include <functional> +#include <tuple> +#include <type_traits> +#include <utility> + +namespace __xray { + +template <size_t Index> struct SerializerImpl { + template <class Tuple, + typename std::enable_if< + Index<std::tuple_size< + typename std::remove_reference<Tuple>::type>::value, + int>::type = 0> static void serializeTo(char *Buffer, + Tuple &&T) { + auto P = reinterpret_cast<const char *>(&std::get<Index>(T)); + constexpr auto Size = sizeof(std::get<Index>(T)); + internal_memcpy(Buffer, P, Size); + SerializerImpl<Index + 1>::serializeTo(Buffer + Size, + std::forward<Tuple>(T)); + } + + template <class Tuple, + typename std::enable_if< + Index >= std::tuple_size<typename std::remove_reference< + Tuple>::type>::value, + int>::type = 0> + static void serializeTo(char *, Tuple &&) {} +}; + +using Serializer = SerializerImpl<0>; + +template <class Tuple, size_t Index> struct AggregateSizesImpl { + static constexpr size_t value = + sizeof(typename std::tuple_element<Index, Tuple>::type) + + AggregateSizesImpl<Tuple, Index - 1>::value; +}; + +template <class Tuple> struct AggregateSizesImpl<Tuple, 0> { + static constexpr size_t value = + sizeof(typename std::tuple_element<0, Tuple>::type); +}; + +template <class Tuple> struct AggregateSizes { + static constexpr size_t value = + AggregateSizesImpl<Tuple, std::tuple_size<Tuple>::value - 1>::value; +}; + +template <MetadataRecord::RecordKinds Kind, class... DataTypes> +MetadataRecord createMetadataRecord(DataTypes &&... Ds) { + static_assert(AggregateSizes<std::tuple<DataTypes...>>::value <= + sizeof(MetadataRecord) - 1, + "Metadata payload longer than metadata buffer!"); + MetadataRecord R; + R.Type = 1; + R.RecordKind = static_cast<uint8_t>(Kind); + Serializer::serializeTo(R.Data, + std::make_tuple(std::forward<DataTypes>(Ds)...)); + return R; +} + +class FDRLogWriter { + BufferQueue::Buffer &Buffer; + char *NextRecord = nullptr; + + template <class T> void writeRecord(const T &R) { + internal_memcpy(NextRecord, reinterpret_cast<const char *>(&R), sizeof(T)); + NextRecord += sizeof(T); + // We need this atomic fence here to ensure that other threads attempting to + // read the bytes in the buffer will see the writes committed before the + // extents are updated. + atomic_thread_fence(memory_order_release); + atomic_fetch_add(Buffer.Extents, sizeof(T), memory_order_acq_rel); + } + +public: + explicit FDRLogWriter(BufferQueue::Buffer &B, char *P) + : Buffer(B), NextRecord(P) { + DCHECK_NE(Buffer.Data, nullptr); + DCHECK_NE(NextRecord, nullptr); + } + + explicit FDRLogWriter(BufferQueue::Buffer &B) + : FDRLogWriter(B, static_cast<char *>(B.Data)) {} + + template <MetadataRecord::RecordKinds Kind, class... Data> + bool writeMetadata(Data &&... Ds) { + // TODO: Check boundary conditions: + // 1) Buffer is full, and cannot handle one metadata record. + // 2) Buffer queue is finalising. + writeRecord(createMetadataRecord<Kind>(std::forward<Data>(Ds)...)); + return true; + } + + template <size_t N> size_t writeMetadataRecords(MetadataRecord (&Recs)[N]) { + constexpr auto Size = sizeof(MetadataRecord) * N; + internal_memcpy(NextRecord, reinterpret_cast<const char *>(Recs), Size); + NextRecord += Size; + // We need this atomic fence here to ensure that other threads attempting to + // read the bytes in the buffer will see the writes committed before the + // extents are updated. + atomic_thread_fence(memory_order_release); + atomic_fetch_add(Buffer.Extents, Size, memory_order_acq_rel); + return Size; + } + + enum class FunctionRecordKind : uint8_t { + Enter = 0x00, + Exit = 0x01, + TailExit = 0x02, + EnterArg = 0x03, + }; + + bool writeFunction(FunctionRecordKind Kind, int32_t FuncId, int32_t Delta) { + FunctionRecord R; + R.Type = 0; + R.RecordKind = uint8_t(Kind); + R.FuncId = FuncId; + R.TSCDelta = Delta; + writeRecord(R); + return true; + } + + bool writeFunctionWithArg(FunctionRecordKind Kind, int32_t FuncId, + int32_t Delta, uint64_t Arg) { + // We need to write the function with arg into the buffer, and then + // atomically update the buffer extents. This ensures that any reads + // synchronised on the buffer extents record will always see the writes + // that happen before the atomic update. + FunctionRecord R; + R.Type = 0; + R.RecordKind = uint8_t(Kind); + R.FuncId = FuncId; + R.TSCDelta = Delta; + MetadataRecord A = + createMetadataRecord<MetadataRecord::RecordKinds::CallArgument>(Arg); + NextRecord = reinterpret_cast<char *>(internal_memcpy( + NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + + sizeof(R); + NextRecord = reinterpret_cast<char *>(internal_memcpy( + NextRecord, reinterpret_cast<char *>(&A), sizeof(A))) + + sizeof(A); + // We need this atomic fence here to ensure that other threads attempting to + // read the bytes in the buffer will see the writes committed before the + // extents are updated. + atomic_thread_fence(memory_order_release); + atomic_fetch_add(Buffer.Extents, sizeof(R) + sizeof(A), + memory_order_acq_rel); + return true; + } + + bool writeCustomEvent(int32_t Delta, const void *Event, int32_t EventSize) { + // We write the metadata record and the custom event data into the buffer + // first, before we atomically update the extents for the buffer. This + // allows us to ensure that any threads reading the extents of the buffer + // will only ever see the full metadata and custom event payload accounted + // (no partial writes accounted). + MetadataRecord R = + createMetadataRecord<MetadataRecord::RecordKinds::CustomEventMarker>( + EventSize, Delta); + NextRecord = reinterpret_cast<char *>(internal_memcpy( + NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + + sizeof(R); + NextRecord = reinterpret_cast<char *>( + internal_memcpy(NextRecord, Event, EventSize)) + + EventSize; + + // We need this atomic fence here to ensure that other threads attempting to + // read the bytes in the buffer will see the writes committed before the + // extents are updated. + atomic_thread_fence(memory_order_release); + atomic_fetch_add(Buffer.Extents, sizeof(R) + EventSize, + memory_order_acq_rel); + return true; + } + + bool writeTypedEvent(int32_t Delta, uint16_t EventType, const void *Event, + int32_t EventSize) { + // We do something similar when writing out typed events, see + // writeCustomEvent(...) above for details. + MetadataRecord R = + createMetadataRecord<MetadataRecord::RecordKinds::TypedEventMarker>( + EventSize, Delta, EventType); + NextRecord = reinterpret_cast<char *>(internal_memcpy( + NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + + sizeof(R); + NextRecord = reinterpret_cast<char *>( + internal_memcpy(NextRecord, Event, EventSize)) + + EventSize; + + // We need this atomic fence here to ensure that other threads attempting to + // read the bytes in the buffer will see the writes committed before the + // extents are updated. + atomic_thread_fence(memory_order_release); + atomic_fetch_add(Buffer.Extents, EventSize, memory_order_acq_rel); + return true; + } + + char *getNextRecord() const { return NextRecord; } + + void resetRecord() { + NextRecord = reinterpret_cast<char *>(Buffer.Data); + atomic_store(Buffer.Extents, 0, memory_order_release); + } + + void undoWrites(size_t B) { + DCHECK_GE(NextRecord - B, reinterpret_cast<char *>(Buffer.Data)); + NextRecord -= B; + atomic_fetch_sub(Buffer.Extents, B, memory_order_acq_rel); + } + +}; // namespace __xray + +} // namespace __xray + +#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp new file mode 100644 index 000000000000..16ce483502f0 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp @@ -0,0 +1,757 @@ +//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Here we implement the Flight Data Recorder mode for XRay, where we use +// compact structures to store records in memory as well as when writing out the +// data to files. +// +//===----------------------------------------------------------------------===// +#include "xray_fdr_logging.h" +#include <cassert> +#include <errno.h> +#include <limits> +#include <memory> +#include <pthread.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_allocator.h" +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_fdr_controller.h" +#include "xray_fdr_flags.h" +#include "xray_fdr_log_writer.h" +#include "xray_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static atomic_sint32_t LoggingStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +namespace { + +// Group together thread-local-data in a struct, then hide it behind a function +// call so that it can be initialized on first use instead of as a global. We +// force the alignment to 64-bytes for x86 cache line alignment, as this +// structure is used in the hot path of implementation. +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { + BufferQueue::Buffer Buffer{}; + BufferQueue *BQ = nullptr; + + using LogWriterStorage = + typename std::aligned_storage<sizeof(FDRLogWriter), + alignof(FDRLogWriter)>::type; + + LogWriterStorage LWStorage; + FDRLogWriter *Writer = nullptr; + + using ControllerStorage = + typename std::aligned_storage<sizeof(FDRController<>), + alignof(FDRController<>)>::type; + ControllerStorage CStorage; + FDRController<> *Controller = nullptr; +}; + +} // namespace + +static_assert(std::is_trivially_destructible<ThreadLocalData>::value, + "ThreadLocalData must be trivially destructible"); + +// Use a global pthread key to identify thread-local data for logging. +static pthread_key_t Key; + +// Global BufferQueue. +static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; +static BufferQueue *BQ = nullptr; + +// Global thresholds for function durations. +static atomic_uint64_t ThresholdTicks{0}; + +// Global for ticks per second. +static atomic_uint64_t TicksPerSec{0}; + +static atomic_sint32_t LogFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +// This function will initialize the thread-local data structure used by the FDR +// logging implementation and return a reference to it. The implementation +// details require a bit of care to maintain. +// +// First, some requirements on the implementation in general: +// +// - XRay handlers should not call any memory allocation routines that may +// delegate to an instrumented implementation. This means functions like +// malloc() and free() should not be called while instrumenting. +// +// - We would like to use some thread-local data initialized on first-use of +// the XRay instrumentation. These allow us to implement unsynchronized +// routines that access resources associated with the thread. +// +// The implementation here uses a few mechanisms that allow us to provide both +// the requirements listed above. We do this by: +// +// 1. Using a thread-local aligned storage buffer for representing the +// ThreadLocalData struct. This data will be uninitialized memory by +// design. +// +// 2. Not requiring a thread exit handler/implementation, keeping the +// thread-local as purely a collection of references/data that do not +// require cleanup. +// +// We're doing this to avoid using a `thread_local` object that has a +// non-trivial destructor, because the C++ runtime might call std::malloc(...) +// to register calls to destructors. Deadlocks may arise when, for example, an +// externally provided malloc implementation is XRay instrumented, and +// initializing the thread-locals involves calling into malloc. A malloc +// implementation that does global synchronization might be holding a lock for a +// critical section, calling a function that might be XRay instrumented (and +// thus in turn calling into malloc by virtue of registration of the +// thread_local's destructor). +#if XRAY_HAS_TLS_ALIGNAS +static_assert(alignof(ThreadLocalData) >= 64, + "ThreadLocalData must be cache line aligned."); +#endif +static ThreadLocalData &getThreadLocalData() { + thread_local typename std::aligned_storage< + sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; + + if (pthread_getspecific(Key) == NULL) { + new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; + pthread_setspecific(Key, &TLDStorage); + } + + return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); +} + +static XRayFileHeader &fdrCommonHeaderInfo() { + static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage; + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + static uint64_t CycleFrequency = NanosecondsPerSecond; + pthread_once( + &OnceInit, +[] { + XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); + // Version 2 of the log writes the extents of the buffer, instead of + // relying on an end-of-buffer record. + // Version 3 includes PID metadata record. + // Version 4 includes CPU data in the custom event records. + // Version 5 uses relative deltas for custom and typed event records, + // and removes the CPU data in custom event records (similar to how + // function records use deltas instead of full TSCs and rely on other + // metadata records for TSC wraparound and CPU migration). + H.Version = 5; + H.Type = FileTypes::FDR_LOG; + + // Test for required CPU features and cache the cycle frequency + TSCSupported = probeRequiredCPUFeatures(); + if (TSCSupported) + CycleFrequency = getTSCFrequency(); + H.CycleFrequency = CycleFrequency; + + // FIXME: Actually check whether we have 'constant_tsc' and + // 'nonstop_tsc' before setting the values in the header. + H.ConstantTSC = 1; + H.NonstopTSC = 1; + }); + return reinterpret_cast<XRayFileHeader &>(HStorage); +} + +// This is the iterator implementation, which knows how to handle FDR-mode +// specific buffers. This is used as an implementation of the iterator function +// needed by __xray_set_buffer_iterator(...). It maintains a global state of the +// buffer iteration for the currently installed FDR mode buffers. In particular: +// +// - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) +// then the iterator returns the header information. +// - If the argument represents the header information ({address of header +// info, size of the header info}) then it returns the first FDR buffer's +// address and extents. +// - It will keep returning the next buffer and extents as there are more +// buffers to process. When the input represents the last buffer, it will +// return the initial state to signal completion ({nullptr, 0}). +// +// See xray/xray_log_interface.h for more details on the requirements for the +// implementations of __xray_set_buffer_iterator(...) and +// __xray_log_process_buffers(...). +XRayBuffer fdrIterator(const XRayBuffer B) { + DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); + DCHECK(BQ->finalizing()); + + if (BQ == nullptr || !BQ->finalizing()) { + if (Verbosity()) + Report( + "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); + return {nullptr, 0}; + } + + // We use a global scratch-pad for the header information, which only gets + // initialized the first time this function is called. We'll update one part + // of this information with some relevant data (in particular the number of + // buffers to expect). + static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage; + static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; + pthread_once( + &HeaderOnce, +[] { + reinterpret_cast<XRayFileHeader &>(HeaderStorage) = + fdrCommonHeaderInfo(); + }); + + // We use a convenience alias for code referring to Header from here on out. + auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); + if (B.Data == nullptr && B.Size == 0) { + Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; + return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; + } + + static BufferQueue::const_iterator It{}; + static BufferQueue::const_iterator End{}; + static uint8_t *CurrentBuffer{nullptr}; + static size_t SerializedBufferSize = 0; + if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { + // From this point on, we provide raw access to the raw buffer we're getting + // from the BufferQueue. We're relying on the iterators from the current + // Buffer queue. + It = BQ->cbegin(); + End = BQ->cend(); + } + + if (CurrentBuffer != nullptr) { + deallocateBuffer(CurrentBuffer, SerializedBufferSize); + CurrentBuffer = nullptr; + } + + if (It == End) + return {nullptr, 0}; + + // Set up the current buffer to contain the extents like we would when writing + // out to disk. The difference here would be that we still write "empty" + // buffers, or at least go through the iterators faithfully to let the + // handlers see the empty buffers in the queue. + // + // We need this atomic fence here to ensure that writes happening to the + // buffer have been committed before we load the extents atomically. Because + // the buffer is not explicitly synchronised across threads, we rely on the + // fence ordering to ensure that writes we expect to have been completed + // before the fence are fully committed before we read the extents. + atomic_thread_fence(memory_order_acquire); + auto BufferSize = atomic_load(It->Extents, memory_order_acquire); + SerializedBufferSize = BufferSize + sizeof(MetadataRecord); + CurrentBuffer = allocateBuffer(SerializedBufferSize); + if (CurrentBuffer == nullptr) + return {nullptr, 0}; + + // Write out the extents as a Metadata Record into the CurrentBuffer. + MetadataRecord ExtentsRecord; + ExtentsRecord.Type = uint8_t(RecordType::Metadata); + ExtentsRecord.RecordKind = + uint8_t(MetadataRecord::RecordKinds::BufferExtents); + internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); + auto AfterExtents = + static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, + sizeof(MetadataRecord))) + + sizeof(MetadataRecord); + internal_memcpy(AfterExtents, It->Data, BufferSize); + + XRayBuffer Result; + Result.Data = CurrentBuffer; + Result.Size = SerializedBufferSize; + ++It; + return Result; +} + +// Must finalize before flushing. +XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { + if (atomic_load(&LoggingStatus, memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (Verbosity()) + Report("Not flushing log, implementation is not finalized.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_release)) { + if (Verbosity()) + Report("Not flushing log, implementation is still finalizing.\n"); + return static_cast<XRayLogFlushStatus>(Result); + } + + if (BQ == nullptr) { + if (Verbosity()) + Report("Cannot flush when global buffer queue is null.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + // We wait a number of milliseconds to allow threads to see that we've + // finalised before attempting to flush the log. + SleepForMillis(fdrFlags()->grace_period_ms); + + // At this point, we're going to uninstall the iterator implementation, before + // we decide to do anything further with the global buffer queue. + __xray_log_remove_buffer_iterator(); + + // Once flushed, we should set the global status of the logging implementation + // to "uninitialized" to allow for FDR-logging multiple runs. + auto ResetToUnitialized = at_scope_exit([] { + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + }); + + auto CleanupBuffers = at_scope_exit([] { + auto &TLD = getThreadLocalData(); + if (TLD.Controller != nullptr) + TLD.Controller->flush(); + }); + + if (fdrFlags()->no_file_flush) { + if (Verbosity()) + Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); + + atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; + } + + // We write out the file in the following format: + // + // 1) We write down the XRay file header with version 1, type FDR_LOG. + // 2) Then we use the 'apply' member of the BufferQueue that's live, to + // ensure that at this point in time we write down the buffers that have + // been released (and marked "used") -- we dump the full buffer for now + // (fixed-sized) and let the tools reading the buffers deal with the data + // afterwards. + // + LogWriter *LW = LogWriter::Open(); + if (LW == nullptr) { + auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + atomic_store(&LogFlushStatus, Result, memory_order_release); + return Result; + } + + XRayFileHeader Header = fdrCommonHeaderInfo(); + Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; + LW->WriteAll(reinterpret_cast<char *>(&Header), + reinterpret_cast<char *>(&Header) + sizeof(Header)); + + // Release the current thread's buffer before we attempt to write out all the + // buffers. This ensures that in case we had only a single thread going, that + // we are able to capture the data nonetheless. + auto &TLD = getThreadLocalData(); + if (TLD.Controller != nullptr) + TLD.Controller->flush(); + + BQ->apply([&](const BufferQueue::Buffer &B) { + // Starting at version 2 of the FDR logging implementation, we only write + // the records identified by the extents of the buffer. We use the Extents + // from the Buffer and write that out as the first record in the buffer. We + // still use a Metadata record, but fill in the extents instead for the + // data. + MetadataRecord ExtentsRecord; + auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); + DCHECK(BufferExtents <= B.Size); + ExtentsRecord.Type = uint8_t(RecordType::Metadata); + ExtentsRecord.RecordKind = + uint8_t(MetadataRecord::RecordKinds::BufferExtents); + internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); + if (BufferExtents > 0) { + LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), + reinterpret_cast<char *>(&ExtentsRecord) + + sizeof(MetadataRecord)); + LW->WriteAll(reinterpret_cast<char *>(B.Data), + reinterpret_cast<char *>(B.Data) + BufferExtents); + } + }); + + atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; + if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_FINALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot finalize log, implementation not initialized.\n"); + return static_cast<XRayLogInitStatus>(CurrentStatus); + } + + // Do special things to make the log finalize itself, and not allow any more + // operations to be performed until re-initialized. + if (BQ == nullptr) { + if (Verbosity()) + Report("Attempting to finalize an uninitialized global buffer!\n"); + } else { + BQ->finalize(); + } + + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +struct TSCAndCPU { + uint64_t TSC = 0; + unsigned char CPU = 0; +}; + +static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { + // We want to get the TSC as early as possible, so that we can check whether + // we've seen this CPU before. We also do it before we load anything else, + // to allow for forward progress with the scheduling. + TSCAndCPU Result; + + // Test once for required CPU features + static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + pthread_once( + &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); + + if (TSCSupported) { + Result.TSC = __xray::readTSC(Result.CPU); + } else { + // FIXME: This code needs refactoring as it appears in multiple locations + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); + TS = {0, 0}; + } + Result.CPU = 0; + Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + } + return Result; +} + +thread_local atomic_uint8_t Running{0}; + +static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { + // Check if we're finalizing, before proceeding. + { + auto Status = atomic_load(&LoggingStatus, memory_order_acquire); + if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || + Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (TLD.Controller != nullptr) { + TLD.Controller->flush(); + TLD.Controller = nullptr; + } + return false; + } + } + + if (UNLIKELY(TLD.Controller == nullptr)) { + // Set up the TLD buffer queue. + if (UNLIKELY(BQ == nullptr)) + return false; + TLD.BQ = BQ; + + // Check that we have a valid buffer. + if (TLD.Buffer.Generation != BQ->generation() && + TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) + return false; + + // Set up a buffer, before setting up the log writer. Bail out on failure. + if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) + return false; + + // Set up the Log Writer for this thread. + if (UNLIKELY(TLD.Writer == nullptr)) { + auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); + new (LWStorage) FDRLogWriter(TLD.Buffer); + TLD.Writer = LWStorage; + } else { + TLD.Writer->resetRecord(); + } + + auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); + new (CStorage) + FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, + atomic_load_relaxed(&ThresholdTicks)); + TLD.Controller = CStorage; + } + + DCHECK_NE(TLD.Controller, nullptr); + return true; +} + +void fdrLoggingHandleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + TLD.Controller->functionEnter(FuncId, TSC, CPU); + return; + case XRayEntryType::EXIT: + TLD.Controller->functionExit(FuncId, TSC, CPU); + return; + case XRayEntryType::TAIL: + TLD.Controller->functionTailExit(FuncId, TSC, CPU); + return; + case XRayEntryType::CUSTOM_EVENT: + case XRayEntryType::TYPED_EVENT: + break; + } +} + +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, + uint64_t Arg) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); + return; + case XRayEntryType::EXIT: + TLD.Controller->functionExit(FuncId, TSC, CPU); + return; + case XRayEntryType::TAIL: + TLD.Controller->functionTailExit(FuncId, TSC, CPU); + return; + case XRayEntryType::CUSTOM_EVENT: + case XRayEntryType::TYPED_EVENT: + break; + } +} + +void fdrLoggingHandleCustomEvent(void *Event, + std::size_t EventSize) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + // Complain when we ever get at least one custom event that's larger than what + // we can possibly support. + if (EventSize > + static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + Report("Custom event size too large; truncating to %d.\n", + std::numeric_limits<int32_t>::max()); + }); + } + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + int32_t ReducedEventSize = static_cast<int32_t>(EventSize); + TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); +} + +void fdrLoggingHandleTypedEvent( + uint16_t EventType, const void *Event, + std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + // Complain when we ever get at least one typed event that's larger than what + // we can possibly support. + if (EventSize > + static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + Report("Typed event size too large; truncating to %d.\n", + std::numeric_limits<int32_t>::max()); + }); + } + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + int32_t ReducedEventSize = static_cast<int32_t>(EventSize); + TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); +} + +XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + if (Options == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot initialize already initialized implementation.\n"); + return static_cast<XRayLogInitStatus>(CurrentStatus); + } + + if (Verbosity()) + Report("Initializing FDR mode with options: %s\n", + static_cast<const char *>(Options)); + + // TODO: Factor out the flags specific to the FDR mode implementation. For + // now, use the global/single definition of the flags, since the FDR mode + // flags are already defined there. + FlagParser FDRParser; + FDRFlags FDRFlags; + registerXRayFDRFlags(&FDRParser, &FDRFlags); + FDRFlags.setDefaults(); + + // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided + // options until we migrate everyone to use the XRAY_FDR_OPTIONS + // compiler-provided options. + FDRParser.ParseString(useCompilerDefinedFlags()); + FDRParser.ParseString(useCompilerDefinedFDRFlags()); + auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + FDRParser.ParseString(EnvOpts); + + // FIXME: Remove this when we fully remove the deprecated flags. + if (internal_strlen(EnvOpts) == 0) { + FDRFlags.func_duration_threshold_us = + flags()->xray_fdr_log_func_duration_threshold_us; + FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; + } + + // The provided options should always override the compiler-provided and + // environment-variable defined options. + FDRParser.ParseString(static_cast<const char *>(Options)); + *fdrFlags() = FDRFlags; + auto BufferSize = FDRFlags.buffer_size; + auto BufferMax = FDRFlags.buffer_max; + + if (BQ == nullptr) { + bool Success = false; + BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); + new (BQ) BufferQueue(BufferSize, BufferMax, Success); + if (!Success) { + Report("BufferQueue init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + } else { + if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { + if (Verbosity()) + Report("Failed to re-initialize global buffer queue. Init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + } + + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once( + &OnceInit, +[] { + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : __xray::NanosecondsPerSecond, + memory_order_release); + pthread_key_create( + &Key, +[](void *TLDPtr) { + if (TLDPtr == nullptr) + return; + auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); + if (TLD.BQ == nullptr) + return; + if (TLD.Buffer.Data == nullptr) + return; + auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); + if (EC != BufferQueue::ErrorCode::Ok) + Report("At thread exit, failed to release buffer at %p; " + "error=%s\n", + TLD.Buffer.Data, BufferQueue::getErrorString(EC)); + }); + }); + + atomic_store(&ThresholdTicks, + atomic_load_relaxed(&TicksPerSec) * + fdrFlags()->func_duration_threshold_us / 1000000, + memory_order_release); + // Arg1 handler should go in first to avoid concurrent code accidentally + // falling back to arg0 when it should have ran arg1. + __xray_set_handler_arg1(fdrLoggingHandleArg1); + // Install the actual handleArg0 handler after initialising the buffers. + __xray_set_handler(fdrLoggingHandleArg0); + __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); + __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); + + // Install the buffer iterator implementation. + __xray_log_set_buffer_iterator(fdrIterator); + + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, + memory_order_release); + + if (Verbosity()) + Report("XRay FDR init successful.\n"); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { + XRayLogImpl Impl{ + fdrLoggingInit, + fdrLoggingFinalize, + fdrLoggingHandleArg0, + fdrLoggingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) { + Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", + RegistrationResult); + return false; + } + + if (flags()->xray_fdr_log || + !internal_strcmp(flags()->xray_mode, "xray-fdr")) { + auto SelectResult = __xray_log_select_mode("xray-fdr"); + if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) { + Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", + SelectResult); + return false; + } + } + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h new file mode 100644 index 000000000000..6df0057c4965 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h @@ -0,0 +1,38 @@ +//===-- xray_fdr_logging.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_FDR_LOGGING_H +#define XRAY_XRAY_FDR_LOGGING_H + +#include "xray/xray_log_interface.h" +#include "xray_fdr_log_records.h" + +// FDR (Flight Data Recorder) Mode +// =============================== +// +// The XRay whitepaper describes a mode of operation for function call trace +// logging that involves writing small records into an in-memory circular +// buffer, that then gets logged to disk on demand. To do this efficiently and +// capture as much data as we can, we use smaller records compared to the +// default mode of always writing fixed-size records. + +namespace __xray { +XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, + void *Options, size_t OptionsSize); +XRayLogInitStatus fdrLoggingFinalize(); +void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1); +XRayLogFlushStatus fdrLoggingFlush(); +XRayLogInitStatus fdrLoggingReset(); + +} // namespace __xray + +#endif // XRAY_XRAY_FDR_LOGGING_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp new file mode 100644 index 000000000000..e4c6906dc443 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp @@ -0,0 +1,84 @@ +//===-- xray_flags.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +Flags xray_flags_dont_use_directly; // use via flags(). + +void Flags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +// This function, as defined with the help of a macro meant to be introduced at +// build time of the XRay runtime, passes in a statically defined list of +// options that control XRay. This means users/deployments can tweak the +// defaults that override the hard-coded defaults in the xray_flags.inc at +// compile-time using the XRAY_DEFAULT_OPTIONS macro. +const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_DEFAULT_OPTIONS + // Do the double-layered string conversion to prevent badly crafted strings + // provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues + // (or changing the semantics of the implementation through the macro). This + // ensures that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a + // string literal. + return SANITIZER_STRINGIFY(XRAY_DEFAULT_OPTIONS); +#else + return ""; +#endif +} + +void initializeFlags() XRAY_NEVER_INSTRUMENT { + SetCommonFlagsDefaults(); + auto *F = flags(); + F->setDefaults(); + + FlagParser XRayParser; + registerXRayFlags(&XRayParser, F); + RegisterCommonFlags(&XRayParser); + + // Use options defaulted at compile-time for the runtime. + const char *XRayCompileFlags = useCompilerDefinedFlags(); + XRayParser.ParseString(XRayCompileFlags); + + // Override from environment variables. + XRayParser.ParseStringFromEnv("XRAY_OPTIONS"); + + // Override from command line. + InitializeCommonFlags(); + + if (Verbosity()) + ReportUnrecognizedFlags(); + + if (common_flags()->help) { + XRayParser.PrintFlagDescriptions(); + } +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h new file mode 100644 index 000000000000..edb5a5119f86 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h @@ -0,0 +1,39 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_FLAGS_H +#define XRAY_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct Flags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_flags.inc" +#undef XRAY_FLAG + + void setDefaults(); +}; + +extern Flags xray_flags_dont_use_directly; +extern void registerXRayFlags(FlagParser *P, Flags *F); +const char *useCompilerDefinedFlags(); +inline Flags *flags() { return &xray_flags_dont_use_directly; } + +void initializeFlags(); + +} // namespace __xray + +#endif // XRAY_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc new file mode 100644 index 000000000000..b7dc5a08f242 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc @@ -0,0 +1,49 @@ +//===-- xray_flags.inc ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(bool, patch_premain, false, + "Whether to patch instrumentation points before main.") +XRAY_FLAG(const char *, xray_logfile_base, "xray-log.", + "Filename base for the xray logfile.") +XRAY_FLAG(const char *, xray_mode, "", "Mode to install by default.") +XRAY_FLAG(uptr, xray_page_size_override, 0, + "Override the default page size for the system, in bytes. The size " + "should be a power-of-two.") + +// Basic (Naive) Mode logging options. +XRAY_FLAG(bool, xray_naive_log, false, + "DEPRECATED: Use xray_mode=xray-basic instead.") +XRAY_FLAG(int, xray_naive_log_func_duration_threshold_us, 5, + "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " + "func_duration_threshold_us instead.") +XRAY_FLAG(int, xray_naive_log_max_stack_depth, 64, + "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " + "max_stack_depth instead.") +XRAY_FLAG(int, xray_naive_log_thread_buffer_size, 1024, + "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " + "thread_buffer_size instead.") + +// FDR (Flight Data Recorder) Mode logging options. +XRAY_FLAG(bool, xray_fdr_log, false, + "DEPRECATED: Use xray_mode=xray-fdr instead.") +XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5, + "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " + "func_duration_threshold_us instead.") +XRAY_FLAG(int, xray_fdr_log_grace_period_us, 0, + "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " + "grace_period_ms instead.") +XRAY_FLAG(int, xray_fdr_log_grace_period_ms, 100, + "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " + "grace_period_ms instead.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h new file mode 100644 index 000000000000..b8c60583761b --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h @@ -0,0 +1,603 @@ +//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the interface for a function call trie. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FUNCTION_CALL_TRIE_H +#define XRAY_FUNCTION_CALL_TRIE_H + +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include <limits> +#include <memory> // For placement new. +#include <utility> + +namespace __xray { + +/// A FunctionCallTrie represents the stack traces of XRay instrumented +/// functions that we've encountered, where a node corresponds to a function and +/// the path from the root to the node its stack trace. Each node in the trie +/// will contain some useful values, including: +/// +/// * The cumulative amount of time spent in this particular node/stack. +/// * The number of times this stack has appeared. +/// * A histogram of latencies for that particular node. +/// +/// Each node in the trie will also contain a list of callees, represented using +/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function +/// ID of the callee, and a pointer to the node. +/// +/// If we visualise this data structure, we'll find the following potential +/// representation: +/// +/// [function id node] -> [callees] [cumulative time] +/// [call counter] [latency histogram] +/// +/// As an example, when we have a function in this pseudocode: +/// +/// func f(N) { +/// g() +/// h() +/// for i := 1..N { j() } +/// } +/// +/// We may end up with a trie of the following form: +/// +/// f -> [ g, h, j ] [...] [1] [...] +/// g -> [ ... ] [...] [1] [...] +/// h -> [ ... ] [...] [1] [...] +/// j -> [ ... ] [...] [N] [...] +/// +/// If for instance the function g() called j() like so: +/// +/// func g() { +/// for i := 1..10 { j() } +/// } +/// +/// We'll find the following updated trie: +/// +/// f -> [ g, h, j ] [...] [1] [...] +/// g -> [ j' ] [...] [1] [...] +/// h -> [ ... ] [...] [1] [...] +/// j -> [ ... ] [...] [N] [...] +/// j' -> [ ... ] [...] [10] [...] +/// +/// Note that we'll have a new node representing the path `f -> g -> j'` with +/// isolated data. This isolation gives us a means of representing the stack +/// traces as a path, as opposed to a key in a table. The alternative +/// implementation here would be to use a separate table for the path, and use +/// hashes of the path as an identifier to accumulate the information. We've +/// moved away from this approach as it takes a lot of time to compute the hash +/// every time we need to update a function's call information as we're handling +/// the entry and exit events. +/// +/// This approach allows us to maintain a shadow stack, which represents the +/// currently executing path, and on function exits quickly compute the amount +/// of time elapsed from the entry, then update the counters for the node +/// already represented in the trie. This necessitates an efficient +/// representation of the various data structures (the list of callees must be +/// cache-aware and efficient to look up, and the histogram must be compact and +/// quick to update) to enable us to keep the overheads of this implementation +/// to the minimum. +class FunctionCallTrie { +public: + struct Node; + + // We use a NodeIdPair type instead of a std::pair<...> to not rely on the + // standard library types in this header. + struct NodeIdPair { + Node *NodePtr; + int32_t FId; + }; + + using NodeIdPairArray = Array<NodeIdPair>; + using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; + + // A Node in the FunctionCallTrie gives us a list of callees, the cumulative + // number of times this node actually appeared, the cumulative amount of time + // for this particular node including its children call times, and just the + // local time spent on this node. Each Node will have the ID of the XRay + // instrumented function that it is associated to. + struct Node { + Node *Parent; + NodeIdPairArray Callees; + uint64_t CallCount; + uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. + int32_t FId; + + // TODO: Include the compact histogram. + }; + +private: + struct ShadowStackEntry { + uint64_t EntryTSC; + Node *NodePtr; + uint16_t EntryCPU; + }; + + using NodeArray = Array<Node>; + using RootArray = Array<Node *>; + using ShadowStackArray = Array<ShadowStackEntry>; + +public: + // We collate the allocators we need into a single struct, as a convenience to + // allow us to initialize these as a group. + struct Allocators { + using NodeAllocatorType = NodeArray::AllocatorType; + using RootAllocatorType = RootArray::AllocatorType; + using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; + + // Use hosted aligned storage members to allow for trivial move and init. + // This also allows us to sidestep the potential-failing allocation issue. + typename std::aligned_storage<sizeof(NodeAllocatorType), + alignof(NodeAllocatorType)>::type + NodeAllocatorStorage; + typename std::aligned_storage<sizeof(RootAllocatorType), + alignof(RootAllocatorType)>::type + RootAllocatorStorage; + typename std::aligned_storage<sizeof(ShadowStackAllocatorType), + alignof(ShadowStackAllocatorType)>::type + ShadowStackAllocatorStorage; + typename std::aligned_storage<sizeof(NodeIdPairAllocatorType), + alignof(NodeIdPairAllocatorType)>::type + NodeIdPairAllocatorStorage; + + NodeAllocatorType *NodeAllocator = nullptr; + RootAllocatorType *RootAllocator = nullptr; + ShadowStackAllocatorType *ShadowStackAllocator = nullptr; + NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; + + Allocators() = default; + Allocators(const Allocators &) = delete; + Allocators &operator=(const Allocators &) = delete; + + struct Buffers { + BufferQueue::Buffer NodeBuffer; + BufferQueue::Buffer RootsBuffer; + BufferQueue::Buffer ShadowStackBuffer; + BufferQueue::Buffer NodeIdPairBuffer; + }; + + explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { + new (&NodeAllocatorStorage) + NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); + NodeAllocator = + reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + + new (&RootAllocatorStorage) + RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); + RootAllocator = + reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + + new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( + B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); + ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( + &ShadowStackAllocatorStorage); + + new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( + B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); + NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( + &NodeIdPairAllocatorStorage); + } + + explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { + new (&NodeAllocatorStorage) NodeAllocatorType(Max); + NodeAllocator = + reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + + new (&RootAllocatorStorage) RootAllocatorType(Max); + RootAllocator = + reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + + new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); + ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( + &ShadowStackAllocatorStorage); + + new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); + NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( + &NodeIdPairAllocatorStorage); + } + + Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { + // Here we rely on the safety of memcpy'ing contents of the storage + // members, and then pointing the source pointers to nullptr. + internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, + sizeof(NodeAllocatorType)); + internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, + sizeof(RootAllocatorType)); + internal_memcpy(&ShadowStackAllocatorStorage, + &O.ShadowStackAllocatorStorage, + sizeof(ShadowStackAllocatorType)); + internal_memcpy(&NodeIdPairAllocatorStorage, + &O.NodeIdPairAllocatorStorage, + sizeof(NodeIdPairAllocatorType)); + + NodeAllocator = + reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + RootAllocator = + reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( + &ShadowStackAllocatorStorage); + NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( + &NodeIdPairAllocatorStorage); + + O.NodeAllocator = nullptr; + O.RootAllocator = nullptr; + O.ShadowStackAllocator = nullptr; + O.NodeIdPairAllocator = nullptr; + } + + Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { + // When moving into an existing instance, we ensure that we clean up the + // current allocators. + if (NodeAllocator) + NodeAllocator->~NodeAllocatorType(); + if (O.NodeAllocator) { + new (&NodeAllocatorStorage) + NodeAllocatorType(std::move(*O.NodeAllocator)); + NodeAllocator = + reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + O.NodeAllocator = nullptr; + } else { + NodeAllocator = nullptr; + } + + if (RootAllocator) + RootAllocator->~RootAllocatorType(); + if (O.RootAllocator) { + new (&RootAllocatorStorage) + RootAllocatorType(std::move(*O.RootAllocator)); + RootAllocator = + reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + O.RootAllocator = nullptr; + } else { + RootAllocator = nullptr; + } + + if (ShadowStackAllocator) + ShadowStackAllocator->~ShadowStackAllocatorType(); + if (O.ShadowStackAllocator) { + new (&ShadowStackAllocatorStorage) + ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); + ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( + &ShadowStackAllocatorStorage); + O.ShadowStackAllocator = nullptr; + } else { + ShadowStackAllocator = nullptr; + } + + if (NodeIdPairAllocator) + NodeIdPairAllocator->~NodeIdPairAllocatorType(); + if (O.NodeIdPairAllocator) { + new (&NodeIdPairAllocatorStorage) + NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); + NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( + &NodeIdPairAllocatorStorage); + O.NodeIdPairAllocator = nullptr; + } else { + NodeIdPairAllocator = nullptr; + } + + return *this; + } + + ~Allocators() XRAY_NEVER_INSTRUMENT { + if (NodeAllocator != nullptr) + NodeAllocator->~NodeAllocatorType(); + if (RootAllocator != nullptr) + RootAllocator->~RootAllocatorType(); + if (ShadowStackAllocator != nullptr) + ShadowStackAllocator->~ShadowStackAllocatorType(); + if (NodeIdPairAllocator != nullptr) + NodeIdPairAllocator->~NodeIdPairAllocatorType(); + } + }; + + static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { + return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); + } + + static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { + Allocators A(Max); + return A; + } + + static Allocators + InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { + Allocators A(Bufs); + return A; + } + +private: + NodeArray Nodes; + RootArray Roots; + ShadowStackArray ShadowStack; + NodeIdPairAllocatorType *NodeIdPairAllocator; + uint32_t OverflowedFunctions; + +public: + explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT + : Nodes(*A.NodeAllocator), + Roots(*A.RootAllocator), + ShadowStack(*A.ShadowStackAllocator), + NodeIdPairAllocator(A.NodeIdPairAllocator), + OverflowedFunctions(0) {} + + FunctionCallTrie() = delete; + FunctionCallTrie(const FunctionCallTrie &) = delete; + FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; + + FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT + : Nodes(std::move(O.Nodes)), + Roots(std::move(O.Roots)), + ShadowStack(std::move(O.ShadowStack)), + NodeIdPairAllocator(O.NodeIdPairAllocator), + OverflowedFunctions(O.OverflowedFunctions) {} + + FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { + Nodes = std::move(O.Nodes); + Roots = std::move(O.Roots); + ShadowStack = std::move(O.ShadowStack); + NodeIdPairAllocator = O.NodeIdPairAllocator; + OverflowedFunctions = O.OverflowedFunctions; + return *this; + } + + ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} + + void enterFunction(const int32_t FId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + DCHECK_NE(FId, 0); + + // If we're already overflowed the function call stack, do not bother + // attempting to record any more function entries. + if (UNLIKELY(OverflowedFunctions)) { + ++OverflowedFunctions; + return; + } + + // If this is the first function we've encountered, we want to set up the + // node(s) and treat it as a root. + if (UNLIKELY(ShadowStack.empty())) { + auto *NewRoot = Nodes.AppendEmplace( + nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); + if (UNLIKELY(NewRoot == nullptr)) + return; + if (Roots.AppendEmplace(NewRoot) == nullptr) { + Nodes.trim(1); + return; + } + if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { + Nodes.trim(1); + Roots.trim(1); + ++OverflowedFunctions; + return; + } + return; + } + + // From this point on, we require that the stack is not empty. + DCHECK(!ShadowStack.empty()); + auto TopNode = ShadowStack.back().NodePtr; + DCHECK_NE(TopNode, nullptr); + + // If we've seen this callee before, then we access that node and place that + // on the top of the stack. + auto* Callee = TopNode->Callees.find_element( + [FId](const NodeIdPair &NR) { return NR.FId == FId; }); + if (Callee != nullptr) { + CHECK_NE(Callee->NodePtr, nullptr); + if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) + ++OverflowedFunctions; + return; + } + + // This means we've never seen this stack before, create a new node here. + auto* NewNode = Nodes.AppendEmplace( + TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); + if (UNLIKELY(NewNode == nullptr)) + return; + DCHECK_NE(NewNode, nullptr); + TopNode->Callees.AppendEmplace(NewNode, FId); + if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) + ++OverflowedFunctions; + return; + } + + void exitFunction(int32_t FId, uint64_t TSC, + uint16_t CPU) XRAY_NEVER_INSTRUMENT { + // If we're exiting functions that have "overflowed" or don't fit into the + // stack due to allocator constraints, we then decrement that count first. + if (OverflowedFunctions) { + --OverflowedFunctions; + return; + } + + // When we exit a function, we look up the ShadowStack to see whether we've + // entered this function before. We do as little processing here as we can, + // since most of the hard work would have already been done at function + // entry. + uint64_t CumulativeTreeTime = 0; + + while (!ShadowStack.empty()) { + const auto &Top = ShadowStack.back(); + auto TopNode = Top.NodePtr; + DCHECK_NE(TopNode, nullptr); + + // We may encounter overflow on the TSC we're provided, which may end up + // being less than the TSC when we first entered the function. + // + // To get the accurate measurement of cycles, we need to check whether + // we've overflowed (TSC < Top.EntryTSC) and then account the difference + // between the entry TSC and the max for the TSC counter (max of uint64_t) + // then add the value of TSC. We can prove that the maximum delta we will + // get is at most the 64-bit unsigned value, since the difference between + // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() + // - 1) + 1. + // + // NOTE: This assumes that TSCs are synchronised across CPUs. + // TODO: Count the number of times we've seen CPU migrations. + uint64_t LocalTime = + Top.EntryTSC > TSC + ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC + : TSC - Top.EntryTSC; + TopNode->CallCount++; + TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; + CumulativeTreeTime += LocalTime; + ShadowStack.trim(1); + + // TODO: Update the histogram for the node. + if (TopNode->FId == FId) + break; + } + } + + const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } + + // The deepCopyInto operation will update the provided FunctionCallTrie by + // re-creating the contents of this particular FunctionCallTrie in the other + // FunctionCallTrie. It will do this using a Depth First Traversal from the + // roots, and while doing so recreating the traversal in the provided + // FunctionCallTrie. + // + // This operation will *not* destroy the state in `O`, and thus may cause some + // duplicate entries in `O` if it is not empty. + // + // This function is *not* thread-safe, and may require external + // synchronisation of both "this" and |O|. + // + // This function must *not* be called with a non-empty FunctionCallTrie |O|. + void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { + DCHECK(O.getRoots().empty()); + + // We then push the root into a stack, to use as the parent marker for new + // nodes we push in as we're traversing depth-first down the call tree. + struct NodeAndParent { + FunctionCallTrie::Node *Node; + FunctionCallTrie::Node *NewNode; + }; + using Stack = Array<NodeAndParent>; + + typename Stack::AllocatorType StackAllocator( + profilingFlags()->stack_allocator_max); + Stack DFSStack(StackAllocator); + + for (const auto Root : getRoots()) { + // Add a node in O for this root. + auto NewRoot = O.Nodes.AppendEmplace( + nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, + Root->CumulativeLocalTime, Root->FId); + + // Because we cannot allocate more memory we should bail out right away. + if (UNLIKELY(NewRoot == nullptr)) + return; + + if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) + return; + + // TODO: Figure out what to do if we fail to allocate any more stack + // space. Maybe warn or report once? + if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) + return; + while (!DFSStack.empty()) { + NodeAndParent NP = DFSStack.back(); + DCHECK_NE(NP.Node, nullptr); + DCHECK_NE(NP.NewNode, nullptr); + DFSStack.trim(1); + for (const auto Callee : NP.Node->Callees) { + auto NewNode = O.Nodes.AppendEmplace( + NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), + Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, + Callee.FId); + if (UNLIKELY(NewNode == nullptr)) + return; + if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == + nullptr)) + return; + if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == + nullptr)) + return; + } + } + } + } + + // The mergeInto operation will update the provided FunctionCallTrie by + // traversing the current trie's roots and updating (i.e. merging) the data in + // the nodes with the data in the target's nodes. If the node doesn't exist in + // the provided trie, we add a new one in the right position, and inherit the + // data from the original (current) trie, along with all its callees. + // + // This function is *not* thread-safe, and may require external + // synchronisation of both "this" and |O|. + void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { + struct NodeAndTarget { + FunctionCallTrie::Node *OrigNode; + FunctionCallTrie::Node *TargetNode; + }; + using Stack = Array<NodeAndTarget>; + typename Stack::AllocatorType StackAllocator( + profilingFlags()->stack_allocator_max); + Stack DFSStack(StackAllocator); + + for (const auto Root : getRoots()) { + Node *TargetRoot = nullptr; + auto R = O.Roots.find_element( + [&](const Node *Node) { return Node->FId == Root->FId; }); + if (R == nullptr) { + TargetRoot = O.Nodes.AppendEmplace( + nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, + Root->FId); + if (UNLIKELY(TargetRoot == nullptr)) + return; + + O.Roots.Append(TargetRoot); + } else { + TargetRoot = *R; + } + + DFSStack.AppendEmplace(Root, TargetRoot); + while (!DFSStack.empty()) { + NodeAndTarget NT = DFSStack.back(); + DCHECK_NE(NT.OrigNode, nullptr); + DCHECK_NE(NT.TargetNode, nullptr); + DFSStack.trim(1); + // TODO: Update the histogram as well when we have it ready. + NT.TargetNode->CallCount += NT.OrigNode->CallCount; + NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; + for (const auto Callee : NT.OrigNode->Callees) { + auto TargetCallee = NT.TargetNode->Callees.find_element( + [&](const FunctionCallTrie::NodeIdPair &C) { + return C.FId == Callee.FId; + }); + if (TargetCallee == nullptr) { + auto NewTargetNode = O.Nodes.AppendEmplace( + NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, + Callee.FId); + + if (UNLIKELY(NewTargetNode == nullptr)) + return; + + TargetCallee = + NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); + } + DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); + } + } + } + } +}; + +} // namespace __xray + +#endif // XRAY_FUNCTION_CALL_TRIE_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp new file mode 100644 index 000000000000..00ba5fe4a52b --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp @@ -0,0 +1,131 @@ +//===-- xray_init.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay initialisation logic. +//===----------------------------------------------------------------------===// + +#include <fcntl.h> +#include <strings.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +extern "C" { +void __xray_init(); +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); +extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); + +#if SANITIZER_MAC +// HACK: This is a temporary workaround to make XRay build on +// Darwin, but it will probably not work at runtime. +const XRaySledEntry __start_xray_instr_map[] = {}; +extern const XRaySledEntry __stop_xray_instr_map[] = {}; +extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {}; +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {}; +#endif +} + +using namespace __xray; + +// When set to 'true' this means the XRay runtime has been initialised. We use +// the weak symbols defined above (__start_xray_inst_map and +// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses +// for runtime patching/unpatching of instrumentation points. +// +// FIXME: Support DSO instrumentation maps too. The current solution only works +// for statically linked executables. +atomic_uint8_t XRayInitialized{0}; + +// This should always be updated before XRayInitialized is updated. +SpinMutex XRayInstrMapMutex; +XRaySledMap XRayInstrMap; + +// Global flag to determine whether the flags have been initialized. +atomic_uint8_t XRayFlagsInitialized{0}; + +// A mutex to allow only one thread to initialize the XRay data structures. +SpinMutex XRayInitMutex; + +// __xray_init() will do the actual loading of the current process' memory map +// and then proceed to look for the .xray_instr_map section/segment. +void __xray_init() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayInitMutex); + // Short-circuit if we've already initialized XRay before. + if (atomic_load(&XRayInitialized, memory_order_acquire)) + return; + + // XRAY is not compatible with PaX MPROTECT + CheckMPROTECT(); + + if (!atomic_load(&XRayFlagsInitialized, memory_order_acquire)) { + initializeFlags(); + atomic_store(&XRayFlagsInitialized, true, memory_order_release); + } + + if (__start_xray_instr_map == nullptr) { + if (Verbosity()) + Report("XRay instrumentation map missing. Not initializing XRay.\n"); + return; + } + + { + SpinMutexLock Guard(&XRayInstrMapMutex); + XRayInstrMap.Sleds = __start_xray_instr_map; + XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; + if (__start_xray_fn_idx != nullptr) { + XRayInstrMap.SledsIndex = __start_xray_fn_idx; + XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; + } else { + size_t CountFunctions = 0; + uint64_t LastFnAddr = 0; + + for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) { + const auto &Sled = XRayInstrMap.Sleds[I]; + const auto Function = Sled.function(); + if (Function != LastFnAddr) { + CountFunctions++; + LastFnAddr = Function; + } + } + + XRayInstrMap.Functions = CountFunctions; + } + } + atomic_store(&XRayInitialized, true, memory_order_release); + +#ifndef XRAY_NO_PREINIT + if (flags()->patch_premain) + __xray_patch(); +#endif +} + +// FIXME: Make check-xray tests work on FreeBSD without +// SANITIZER_CAN_USE_PREINIT_ARRAY. +// See sanitizer_internal_defs.h where the macro is defined. +// Calling unresolved PLT functions in .preinit_array can lead to deadlock on +// FreeBSD but here it seems benign. +#if !defined(XRAY_NO_PREINIT) && \ + (SANITIZER_CAN_USE_PREINIT_ARRAY || SANITIZER_FREEBSD) +// Only add the preinit array initialization if the sanitizers can. +__attribute__((section(".preinit_array"), + used)) void (*__local_xray_preinit)(void) = __xray_init; +#else +// If we cannot use the .preinit_array section, we should instead use dynamic +// initialisation. +__attribute__ ((constructor (0))) +static void __local_xray_dyninit() { + __xray_init(); +} +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp new file mode 100644 index 000000000000..7669b9ab82be --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp @@ -0,0 +1,518 @@ +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. +// +//===----------------------------------------------------------------------===// + +#include "xray_interface_internal.h" + +#include <cstdint> +#include <cstdio> +#include <errno.h> +#include <limits> +#include <string.h> +#include <sys/mman.h> + +#if SANITIZER_FUCHSIA +#include <zircon/process.h> +#include <zircon/sanitizer.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#endif + +#include "sanitizer_common/sanitizer_addrhashmap.h" +#include "sanitizer_common/sanitizer_common.h" + +#include "xray_defs.h" +#include "xray_flags.h" + +extern __sanitizer::SpinMutex XRayInstrMapMutex; +extern __sanitizer::atomic_uint8_t XRayInitialized; +extern __xray::XRaySledMap XRayInstrMap; + +namespace __xray { + +#if defined(__x86_64__) +static const int16_t cSledLength = 12; +#elif defined(__aarch64__) +static const int16_t cSledLength = 32; +#elif defined(__arm__) +static const int16_t cSledLength = 28; +#elif SANITIZER_MIPS32 +static const int16_t cSledLength = 48; +#elif SANITIZER_MIPS64 +static const int16_t cSledLength = 64; +#elif defined(__powerpc64__) +static const int16_t cSledLength = 8; +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +// This is the function to call when we encounter the entry or exit sleds. +atomic_uintptr_t XRayPatchedFunction{0}; + +// This is the function to call from the arg1-enabled sleds/trampolines. +atomic_uintptr_t XRayArgLogger{0}; + +// This is the function to call when we encounter a custom event log call. +atomic_uintptr_t XRayPatchedCustomEvent{0}; + +// This is the function to call when we encounter a typed event log call. +atomic_uintptr_t XRayPatchedTypedEvent{0}; + +// This is the global status to determine whether we are currently +// patching/unpatching. +atomic_uint8_t XRayPatching{0}; + +struct TypeDescription { + uint32_t type_id; + std::size_t description_string_length; +}; + +using TypeDescriptorMapType = AddrHashMap<TypeDescription, 11>; +// An address map from immutable descriptors to type ids. +TypeDescriptorMapType TypeDescriptorAddressMap{}; + +atomic_uint32_t TypeEventDescriptorCounter{0}; + +// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will +// undo any successful mprotect(...) changes. This is used to make a page +// writeable and executable, and upon destruction if it was successful in +// doing so returns the page into a read-only and executable page. +// +// This is only used specifically for runtime-patching of the XRay +// instrumentation points. This assumes that the executable pages are +// originally read-and-execute only. +class MProtectHelper { + void *PageAlignedAddr; + std::size_t MProtectLen; + bool MustCleanup; + +public: + explicit MProtectHelper(void *PageAlignedAddr, + std::size_t MProtectLen, + std::size_t PageSize) XRAY_NEVER_INSTRUMENT + : PageAlignedAddr(PageAlignedAddr), + MProtectLen(MProtectLen), + MustCleanup(false) { +#if SANITIZER_FUCHSIA + MProtectLen = RoundUpTo(MProtectLen, PageSize); +#endif + } + + int MakeWriteable() XRAY_NEVER_INSTRUMENT { +#if SANITIZER_FUCHSIA + auto R = __sanitizer_change_code_protection( + reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, true); + if (R != ZX_OK) { + Report("XRay: cannot change code protection: %s\n", + _zx_status_get_string(R)); + return -1; + } + MustCleanup = true; + return 0; +#else + auto R = mprotect(PageAlignedAddr, MProtectLen, + PROT_READ | PROT_WRITE | PROT_EXEC); + if (R != -1) + MustCleanup = true; + return R; +#endif + } + + ~MProtectHelper() XRAY_NEVER_INSTRUMENT { + if (MustCleanup) { +#if SANITIZER_FUCHSIA + auto R = __sanitizer_change_code_protection( + reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, false); + if (R != ZX_OK) { + Report("XRay: cannot change code protection: %s\n", + _zx_status_get_string(R)); + } +#else + mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC); +#endif + } + } +}; + +namespace { + +bool patchSled(const XRaySledEntry &Sled, bool Enable, + int32_t FuncId) XRAY_NEVER_INSTRUMENT { + bool Success = false; + switch (Sled.Kind) { + case XRayEntryType::ENTRY: + Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry); + break; + case XRayEntryType::EXIT: + Success = patchFunctionExit(Enable, FuncId, Sled); + break; + case XRayEntryType::TAIL: + Success = patchFunctionTailExit(Enable, FuncId, Sled); + break; + case XRayEntryType::LOG_ARGS_ENTRY: + Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry); + break; + case XRayEntryType::CUSTOM_EVENT: + Success = patchCustomEvent(Enable, FuncId, Sled); + break; + case XRayEntryType::TYPED_EVENT: + Success = patchTypedEvent(Enable, FuncId, Sled); + break; + default: + Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind)); + return false; + } + return Success; +} + +const XRayFunctionSledIndex +findFunctionSleds(int32_t FuncId, + const XRaySledMap &InstrMap) XRAY_NEVER_INSTRUMENT { + int32_t CurFn = 0; + uint64_t LastFnAddr = 0; + XRayFunctionSledIndex Index = {nullptr, nullptr}; + + for (std::size_t I = 0; I < InstrMap.Entries && CurFn <= FuncId; I++) { + const auto &Sled = InstrMap.Sleds[I]; + const auto Function = Sled.function(); + if (Function != LastFnAddr) { + CurFn++; + LastFnAddr = Function; + } + + if (CurFn == FuncId) { + if (Index.Begin == nullptr) + Index.Begin = &Sled; + Index.End = &Sled; + } + } + + Index.End += 1; + + return Index; +} + +XRayPatchingStatus patchFunction(int32_t FuncId, + bool Enable) XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + + uint8_t NotPatching = false; + if (!atomic_compare_exchange_strong( + &XRayPatching, &NotPatching, true, memory_order_acq_rel)) + return XRayPatchingStatus::ONGOING; // Already patching. + + // Next, we look for the function index. + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + // If we don't have an index, we can't patch individual functions. + if (InstrMap.Functions == 0) + return XRayPatchingStatus::NOT_INITIALIZED; + + // FuncId must be a positive number, less than the number of functions + // instrumented. + if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) { + Report("Invalid function id provided: %d\n", FuncId); + return XRayPatchingStatus::FAILED; + } + + // Now we patch ths sleds for this specific function. + auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1] + : findFunctionSleds(FuncId, InstrMap); + auto *f = SledRange.Begin; + auto *e = SledRange.End; + bool SucceedOnce = false; + while (f != e) + SucceedOnce |= patchSled(*f++, Enable, FuncId); + + atomic_store(&XRayPatching, false, + memory_order_release); + + if (!SucceedOnce) { + Report("Failed patching any sled for function '%d'.", FuncId); + return XRayPatchingStatus::FAILED; + } + + return XRayPatchingStatus::SUCCESS; +} + +// controlPatching implements the common internals of the patching/unpatching +// implementation. |Enable| defines whether we're enabling or disabling the +// runtime XRay instrumentation. +XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + + uint8_t NotPatching = false; + if (!atomic_compare_exchange_strong( + &XRayPatching, &NotPatching, true, memory_order_acq_rel)) + return XRayPatchingStatus::ONGOING; // Already patching. + + uint8_t PatchingSuccess = false; + auto XRayPatchingStatusResetter = + at_scope_exit([&PatchingSuccess] { + if (!PatchingSuccess) + atomic_store(&XRayPatching, false, + memory_order_release); + }); + + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + if (InstrMap.Entries == 0) + return XRayPatchingStatus::NOT_INITIALIZED; + + uint32_t FuncId = 1; + uint64_t CurFun = 0; + + // First we want to find the bounds for which we have instrumentation points, + // and try to get as few calls to mprotect(...) as possible. We're assuming + // that all the sleds for the instrumentation map are contiguous as a single + // set of pages. When we do support dynamic shared object instrumentation, + // we'll need to do this for each set of page load offsets per DSO loaded. For + // now we're assuming we can mprotect the whole section of text between the + // minimum sled address and the maximum sled address (+ the largest sled + // size). + auto *MinSled = &InstrMap.Sleds[0]; + auto *MaxSled = &InstrMap.Sleds[InstrMap.Entries - 1]; + for (std::size_t I = 0; I < InstrMap.Entries; I++) { + const auto &Sled = InstrMap.Sleds[I]; + if (Sled.address() < MinSled->address()) + MinSled = &Sled; + if (Sled.address() > MaxSled->address()) + MaxSled = &Sled; + } + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { + Report("System page size is not a power of two: %lld\n", PageSize); + return XRayPatchingStatus::FAILED; + } + + void *PageAlignedAddr = + reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1)); + size_t MProtectLen = + (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) + + cSledLength; + MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); + if (Protector.MakeWriteable() == -1) { + Report("Failed mprotect: %d\n", errno); + return XRayPatchingStatus::FAILED; + } + + for (std::size_t I = 0; I < InstrMap.Entries; ++I) { + auto &Sled = InstrMap.Sleds[I]; + auto F = Sled.function(); + if (CurFun == 0) + CurFun = F; + if (F != CurFun) { + ++FuncId; + CurFun = F; + } + patchSled(Sled, Enable, FuncId); + } + atomic_store(&XRayPatching, false, + memory_order_release); + PatchingSuccess = true; + return XRayPatchingStatus::SUCCESS; +} + +XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, + bool Enable) XRAY_NEVER_INSTRUMENT { + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + // FuncId must be a positive number, less than the number of functions + // instrumented. + if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) { + Report("Invalid function id provided: %d\n", FuncId); + return XRayPatchingStatus::FAILED; + } + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { + Report("Provided page size is not a power of two: %lld\n", PageSize); + return XRayPatchingStatus::FAILED; + } + + // Here we compute the minumum sled and maximum sled associated with a + // particular function ID. + auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1] + : findFunctionSleds(FuncId, InstrMap); + auto *f = SledRange.Begin; + auto *e = SledRange.End; + auto *MinSled = f; + auto *MaxSled = (SledRange.End - 1); + while (f != e) { + if (f->address() < MinSled->address()) + MinSled = f; + if (f->address() > MaxSled->address()) + MaxSled = f; + ++f; + } + + void *PageAlignedAddr = + reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1)); + size_t MProtectLen = + (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) + + cSledLength; + MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); + if (Protector.MakeWriteable() == -1) { + Report("Failed mprotect: %d\n", errno); + return XRayPatchingStatus::FAILED; + } + return patchFunction(FuncId, Enable); +} + +} // namespace + +} // namespace __xray + +using namespace __xray; + +// The following functions are declared `extern "C" {...}` in the header, hence +// they're defined in the global namespace. + +int __xray_set_handler(void (*entry)(int32_t, + XRayEntryType)) XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + + atomic_store(&__xray::XRayPatchedFunction, + reinterpret_cast<uintptr_t>(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_set_customevent_handler(void (*entry)(void *, size_t)) + XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + atomic_store(&__xray::XRayPatchedCustomEvent, + reinterpret_cast<uintptr_t>(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_set_typedevent_handler(void (*entry)( + uint16_t, const void *, size_t)) XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + atomic_store(&__xray::XRayPatchedTypedEvent, + reinterpret_cast<uintptr_t>(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_handler(nullptr); +} + +int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_customevent_handler(nullptr); +} + +int __xray_remove_typedevent_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_typedevent_handler(nullptr); +} + +uint16_t __xray_register_event_type( + const char *const event_type) XRAY_NEVER_INSTRUMENT { + TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type); + if (h.created()) { + h->type_id = atomic_fetch_add( + &TypeEventDescriptorCounter, 1, memory_order_acq_rel); + h->description_string_length = strnlen(event_type, 1024); + } + return h->type_id; +} + +XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { + return controlPatching(true); +} + +XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT { + return controlPatching(false); +} + +XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + return mprotectAndPatchFunction(FuncId, true); +} + +XRayPatchingStatus +__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + return mprotectAndPatchFunction(FuncId, false); +} + +int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return 0; + + // A relaxed write might not be visible even if the current thread gets + // scheduled on a different CPU/NUMA node. We need to wait for everyone to + // have this handler installed for consistency of collected data across CPUs. + atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry), + memory_order_release); + return 1; +} + +int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } + +uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) + return 0; + const XRaySledEntry *Sled = InstrMap.SledsIndex + ? InstrMap.SledsIndex[FuncId - 1].Begin + : findFunctionSleds(FuncId, InstrMap).Begin; + return Sled->function() +// On PPC, function entries are always aligned to 16 bytes. The beginning of a +// sled might be a local entry, which is always +8 based on the global entry. +// Always return the global entry. +#ifdef __PPC__ + & ~0xf +#endif + ; +} + +size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayInstrMapMutex); + return XRayInstrMap.Functions; +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h new file mode 100644 index 000000000000..390f389b1dca --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h @@ -0,0 +1,104 @@ +//===-- xray_interface_internal.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. See also include/xray/xray_interface.h. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_INTERFACE_INTERNAL_H +#define XRAY_INTERFACE_INTERNAL_H + +#include "sanitizer_common/sanitizer_platform.h" +#include "xray/xray_interface.h" +#include <cstddef> +#include <cstdint> + +extern "C" { + +struct XRaySledEntry { +#if SANITIZER_WORDSIZE == 64 + uint64_t Address; + uint64_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Version; + unsigned char Padding[13]; // Need 32 bytes + uint64_t function() const { + if (Version < 2) + return Function; + // The target address is relative to the location of the Function variable. + return reinterpret_cast<uint64_t>(&Function) + Function; + } + uint64_t address() const { + if (Version < 2) + return Address; + // The target address is relative to the location of the Address variable. + return reinterpret_cast<uint64_t>(&Address) + Address; + } +#elif SANITIZER_WORDSIZE == 32 + uint32_t Address; + uint32_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Version; + unsigned char Padding[5]; // Need 16 bytes + uint32_t function() const { + if (Version < 2) + return Function; + // The target address is relative to the location of the Function variable. + return reinterpret_cast<uint32_t>(&Function) + Function; + } + uint32_t address() const { + if (Version < 2) + return Address; + // The target address is relative to the location of the Address variable. + return reinterpret_cast<uint32_t>(&Address) + Address; + } +#else +#error "Unsupported word size." +#endif +}; + +struct XRayFunctionSledIndex { + const XRaySledEntry *Begin; + const XRaySledEntry *End; +}; +} + +namespace __xray { + +struct XRaySledMap { + const XRaySledEntry *Sleds; + size_t Entries; + const XRayFunctionSledIndex *SledsIndex; + size_t Functions; +}; + +bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled, + void (*Trampoline)()); +bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchFunctionTailExit(bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled); +bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); + +} // namespace __xray + +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.S files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +extern void __xray_FunctionTailExit(); +extern void __xray_ArgLoggerEntry(); +extern void __xray_CustomEvent(); +extern void __xray_TypedEvent(); +} + +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp new file mode 100644 index 000000000000..fc70373f9dac --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp @@ -0,0 +1,209 @@ +//===-- xray_log_interface.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray/xray_log_interface.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "xray/xray_interface.h" +#include "xray_defs.h" + +namespace __xray { +static SpinMutex XRayImplMutex; +static XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr}; +static XRayLogImpl *GlobalXRayImpl = nullptr; + +// This is the default implementation of a buffer iterator, which always yields +// a null buffer. +XRayBuffer NullBufferIterator(XRayBuffer) XRAY_NEVER_INSTRUMENT { + return {nullptr, 0}; +} + +// This is the global function responsible for iterating through given buffers. +atomic_uintptr_t XRayBufferIterator{ + reinterpret_cast<uintptr_t>(&NullBufferIterator)}; + +// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list +// when it should be a map because we're avoiding having to depend on C++ +// standard library data structures at this level of the implementation. +struct ModeImpl { + ModeImpl *Next; + const char *Mode; + XRayLogImpl Impl; +}; + +static ModeImpl SentinelModeImpl{ + nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}}; +static ModeImpl *ModeImpls = &SentinelModeImpl; +static const ModeImpl *CurrentMode = nullptr; + +} // namespace __xray + +using namespace __xray; + +void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer)) + XRAY_NEVER_INSTRUMENT { + atomic_store(&__xray::XRayBufferIterator, + reinterpret_cast<uintptr_t>(Iterator), memory_order_release); +} + +void __xray_log_remove_buffer_iterator() XRAY_NEVER_INSTRUMENT { + __xray_log_set_buffer_iterator(&NullBufferIterator); +} + +XRayLogRegisterStatus +__xray_log_register_mode(const char *Mode, + XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { + if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr || + Impl.log_finalize == nullptr || Impl.log_init == nullptr) + return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL; + + SpinMutexLock Guard(&XRayImplMutex); + // First, look for whether the mode already has a registered implementation. + for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { + if (!internal_strcmp(Mode, it->Mode)) + return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE; + } + auto *NewModeImpl = static_cast<ModeImpl *>(InternalAlloc(sizeof(ModeImpl))); + NewModeImpl->Next = ModeImpls; + NewModeImpl->Mode = internal_strdup(Mode); + NewModeImpl->Impl = Impl; + ModeImpls = NewModeImpl; + return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; +} + +XRayLogRegisterStatus +__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { + if (!internal_strcmp(Mode, it->Mode)) { + CurrentMode = it; + CurrentXRayImpl = it->Impl; + GlobalXRayImpl = &CurrentXRayImpl; + __xray_set_handler(it->Impl.handle_arg0); + return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; + } + } + return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND; +} + +const char *__xray_log_get_current_mode() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (CurrentMode != nullptr) + return CurrentMode->Mode; + return nullptr; +} + +void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { + if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || + Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { + SpinMutexLock Guard(&XRayImplMutex); + GlobalXRayImpl = nullptr; + CurrentMode = nullptr; + __xray_remove_handler(); + __xray_remove_handler_arg1(); + return; + } + + SpinMutexLock Guard(&XRayImplMutex); + CurrentXRayImpl = Impl; + GlobalXRayImpl = &CurrentXRayImpl; + __xray_set_handler(Impl.handle_arg0); +} + +void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + GlobalXRayImpl = nullptr; + __xray_remove_handler(); + __xray_remove_handler_arg1(); +} + +XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, + void *Args, + size_t ArgsSize) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize); +} + +XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config) + XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + if (Config == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Check first whether the current mode is the same as what we expect. + if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Here we do some work to coerce the pointer we're provided, so that + // the implementations that still take void* pointers can handle the + // data provided in the Config argument. + return GlobalXRayImpl->log_init( + 0, 0, const_cast<void *>(static_cast<const void *>(Config)), 0); +} + +XRayLogInitStatus +__xray_log_init_mode_bin(const char *Mode, const char *Config, + size_t ConfigSize) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + if (Config == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Check first whether the current mode is the same as what we expect. + if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Here we do some work to coerce the pointer we're provided, so that + // the implementations that still take void* pointers can handle the + // data provided in the Config argument. + return GlobalXRayImpl->log_init( + 0, 0, const_cast<void *>(static_cast<const void *>(Config)), ConfigSize); +} + +XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_finalize(); +} + +XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + return GlobalXRayImpl->flush_log(); +} + +XRayLogFlushStatus __xray_log_process_buffers( + void (*Processor)(const char *, XRayBuffer)) XRAY_NEVER_INSTRUMENT { + // We want to make sure that there will be no changes to the global state for + // the log by synchronising on the XRayBufferIteratorMutex. + if (!GlobalXRayImpl) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + auto Iterator = reinterpret_cast<XRayBuffer (*)(XRayBuffer)>( + atomic_load(&XRayBufferIterator, memory_order_acquire)); + auto Buffer = (*Iterator)(XRayBuffer{nullptr, 0}); + auto Mode = CurrentMode ? CurrentMode->Mode : nullptr; + while (Buffer.Data != nullptr) { + (*Processor)(Mode, Buffer); + Buffer = (*Iterator)(Buffer); + } + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp new file mode 100644 index 000000000000..26fc50374471 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp @@ -0,0 +1,170 @@ +//===-- xray_mips.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDIU = 0x24000000, // addiu rt, rs, imm + PO_SW = 0xAC000000, // sw rt, offset(sp) + PO_LUI = 0x3C000000, // lui rs, %hi(address) + PO_ORI = 0x34000000, // ori rt, rs, %lo(address) + PO_JALR = 0x0000F809, // jalr rs + PO_LW = 0x8C000000, // lw rt, offset(address) + PO_B44 = 0x1000000b, // b #44 + PO_NOP = 0x0, // nop +}; + +enum RegNum : uint32_t { + RN_T0 = 0x8, + RN_T9 = 0x19, + RN_RA = 0x1F, + RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, + uint32_t Rt, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 11 NOPs (44 bytes) + // .tmpN + // ADDIU T9, T9, 44 + // + // With the following runtime patch: + // + // xray_sled_n (32-bit): + // addiu sp, sp, -8 ;create stack frame + // nop + // sw ra, 4(sp) ;save return address + // sw t9, 0(sp) ;save register t9 + // lui t9, %hi(__xray_FunctionEntry/Exit) + // ori t9, t9, %lo(__xray_FunctionEntry/Exit) + // lui t0, %hi(function_id) + // jalr t9 ;call Tracing hook + // ori t0, t0, %lo(function_id) ;pass function id (delay slot) + // lw t9, 0(sp) ;restore register t9 + // lw ra, 4(sp) ;restore return address + // addiu sp, sp, 8 ;delete stack frame + // + // We add 44 bytes to t9 because we want to adjust the function pointer to + // the actual start of function i.e. the address just after the noop sled. + // We do this because gp displacement relocation is emitted at the start of + // of the function i.e after the nop sled and to correctly calculate the + // global offset table address, t9 must hold the address of the instruction + // containing the gp displacement relocation. + // FIXME: Is this correct for the static relocation model? + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #44 + + if (Enable) { + uint32_t LoTracingHookAddr = + reinterpret_cast<int32_t>(TracingHook) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff; + uint32_t LoFunctionID = FuncId & 0xffff; + uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; + *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction( + PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_RA, 0x4); + *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction( + PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HiTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 20) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); + *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeSpecialInstruction( + PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); + *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); + *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction( + PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction( + PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_RA, 0x4); + *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeInstruction( + PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x8); + uint32_t CreateStackSpaceInstr = encodeInstruction( + PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8); + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address), + uint32_t(CreateStackSpaceInstr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address), + uint32_t(PatchOpcodes::PO_B44), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp new file mode 100644 index 000000000000..62c67ff7376d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp @@ -0,0 +1,178 @@ +//===-- xray_mips64.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS64-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_DADDIU = 0x64000000, // daddiu rt, rs, imm + PO_SD = 0xFC000000, // sd rt, base(offset) + PO_LUI = 0x3C000000, // lui rt, imm + PO_ORI = 0x34000000, // ori rt, rs, imm + PO_DSLL = 0x00000038, // dsll rd, rt, sa + PO_JALR = 0x00000009, // jalr rs + PO_LD = 0xDC000000, // ld rt, base(offset) + PO_B60 = 0x1000000f, // b #60 + PO_NOP = 0x0, // nop +}; + +enum RegNum : uint32_t { + RN_T0 = 0xC, + RN_T9 = 0x19, + RN_RA = 0x1F, + RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, + uint32_t Rt, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 15 NOPs (60 bytes) + // .tmpN + // + // With the following runtime patch: + // + // xray_sled_n (64-bit): + // daddiu sp, sp, -16 ;create stack frame + // nop + // sd ra, 8(sp) ;save return address + // sd t9, 0(sp) ;save register t9 + // lui t9, %highest(__xray_FunctionEntry/Exit) + // ori t9, t9, %higher(__xray_FunctionEntry/Exit) + // dsll t9, t9, 16 + // ori t9, t9, %hi(__xray_FunctionEntry/Exit) + // dsll t9, t9, 16 + // ori t9, t9, %lo(__xray_FunctionEntry/Exit) + // lui t0, %hi(function_id) + // jalr t9 ;call Tracing hook + // ori t0, t0, %lo(function_id) ;pass function id (delay slot) + // ld t9, 0(sp) ;restore register t9 + // ld ra, 8(sp) ;restore return address + // daddiu sp, sp, 16 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #60 + + if (Enable) { + uint32_t LoTracingHookAddr = + reinterpret_cast<int64_t>(TracingHook) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff; + uint32_t HigherTracingHookAddr = + (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff; + uint32_t HighestTracingHookAddr = + (reinterpret_cast<int64_t>(TracingHook) >> 48) & 0xffff; + uint32_t LoFunctionID = FuncId & 0xffff; + uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; + *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction( + PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_RA, 0x8); + *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction( + PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HighestTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 20) = + encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, + HigherTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeSpecialInstruction( + PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); + *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, HiTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeSpecialInstruction( + PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); + *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); + *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); + *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeSpecialInstruction( + PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); + *reinterpret_cast<uint32_t *>(Sled.Address + 48) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); + *reinterpret_cast<uint32_t *>(Sled.Address + 52) = encodeInstruction( + PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast<uint32_t *>(Sled.Address + 56) = encodeInstruction( + PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_RA, 0x8); + *reinterpret_cast<uint32_t *>(Sled.Address + 60) = encodeInstruction( + PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeInstruction( + PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address), + CreateStackSpace, std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address), + uint32_t(PatchOpcodes::PO_B60), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt new file mode 100644 index 000000000000..7fa48dda7e16 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt @@ -0,0 +1,6 @@ +# List of function matchers common to C/C++ applications that make sense to +# never instrument. You can use this as an argument to +# -fxray-never-instrument=<path> along with your project-specific lists. + +# Never instrument any function whose symbol starts with __xray. +fun:__xray* diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp new file mode 100644 index 000000000000..c3553d848313 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp @@ -0,0 +1,113 @@ +//===-- xray_powerpc64.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of powerpc64 and powerpc64le routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include "xray_utils.h" +#include <atomic> +#include <cassert> +#include <cstring> + +#ifndef __LITTLE_ENDIAN__ +#error powerpc64 big endian is not supported for now. +#endif + +namespace { + +constexpr unsigned long long JumpOverInstNum = 7; + +void clearCache(void *Addr, size_t Len) { + const size_t LineSize = 32; + + const intptr_t Mask = ~(LineSize - 1); + const intptr_t StartLine = ((intptr_t)Addr) & Mask; + const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask; + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("dcbf 0, %0" : : "r"(Line)); + asm volatile("sync"); + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +} + +} // namespace + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +bool patchFunctionEntry(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + const uint64_t Address = Sled.address(); + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast<uint64_t *>(Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // b +JumpOverInstNum instructions. + *reinterpret_cast<uint32_t *>(Address) = + 0x48000000ull + (JumpOverInstNum << 2); + } + clearCache(reinterpret_cast<void *>(Address), 8); + return true; +} + +bool patchFunctionExit(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + const uint64_t Address = Sled.address(); + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast<uint64_t *>(Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // Copy the blr/b instruction after JumpOverInstNum instructions. + *reinterpret_cast<uint32_t *>(Address) = + *(reinterpret_cast<uint32_t *>(Address) + JumpOverInstNum); + } + clearCache(reinterpret_cast<void *>(Address), 8); + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchFunctionExit(Enable, FuncId, Sled); +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc new file mode 100644 index 000000000000..e4e16d5b28e0 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc @@ -0,0 +1,36 @@ +//===-- xray_powerpc64.inc --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> +#include <mutex> +#include <sys/platform/ppc.h> + +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + CPU = 0; + return __ppc_get_timebase(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + static std::mutex M; + std::lock_guard<std::mutex> Guard(M); + return __ppc_get_timebase_freq(); +} + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { + return true; +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp new file mode 100644 index 000000000000..bef2504f2a16 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp @@ -0,0 +1,414 @@ +//===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the interface for the profileCollectorService. +// +//===----------------------------------------------------------------------===// +#include "xray_profile_collector.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include <memory> +#include <pthread.h> +#include <utility> + +namespace __xray { +namespace profileCollectorService { + +namespace { + +SpinMutex GlobalMutex; +struct ThreadTrie { + tid_t TId; + typename std::aligned_storage<sizeof(FunctionCallTrie)>::type TrieStorage; +}; + +struct ProfileBuffer { + void *Data; + size_t Size; +}; + +// Current version of the profile format. +constexpr u64 XRayProfilingVersion = 0x20180424; + +// Identifier for XRay profiling files 'xrayprof' in hex. +constexpr u64 XRayMagicBytes = 0x7872617970726f66; + +struct XRayProfilingFileHeader { + const u64 MagicBytes = XRayMagicBytes; + const u64 Version = XRayProfilingVersion; + u64 Timestamp = 0; // System time in nanoseconds. + u64 PID = 0; // Process ID. +}; + +struct BlockHeader { + u32 BlockSize; + u32 BlockNum; + u64 ThreadId; +}; + +struct ThreadData { + BufferQueue *BQ; + FunctionCallTrie::Allocators::Buffers Buffers; + FunctionCallTrie::Allocators Allocators; + FunctionCallTrie FCT; + tid_t TId; +}; + +using ThreadDataArray = Array<ThreadData>; +using ThreadDataAllocator = ThreadDataArray::AllocatorType; + +// We use a separate buffer queue for the backing store for the allocator used +// by the ThreadData array. This lets us host the buffers, allocators, and tries +// associated with a thread by moving the data into the array instead of +// attempting to copy the data to a separately backed set of tries. +static typename std::aligned_storage< + sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage; +static BufferQueue *BQ = nullptr; +static BufferQueue::Buffer Buffer; +static typename std::aligned_storage<sizeof(ThreadDataAllocator), + alignof(ThreadDataAllocator)>::type + ThreadDataAllocatorStorage; +static typename std::aligned_storage<sizeof(ThreadDataArray), + alignof(ThreadDataArray)>::type + ThreadDataArrayStorage; + +static ThreadDataAllocator *TDAllocator = nullptr; +static ThreadDataArray *TDArray = nullptr; + +using ProfileBufferArray = Array<ProfileBuffer>; +using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; + +// These need to be global aligned storage to avoid dynamic initialization. We +// need these to be aligned to allow us to placement new objects into the +// storage, and have pointers to those objects be appropriately aligned. +static typename std::aligned_storage<sizeof(ProfileBufferArray)>::type + ProfileBuffersStorage; +static typename std::aligned_storage<sizeof(ProfileBufferArrayAllocator)>::type + ProfileBufferArrayAllocatorStorage; + +static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; +static ProfileBufferArray *ProfileBuffers = nullptr; + +// Use a global flag to determine whether the collector implementation has been +// initialized. +static atomic_uint8_t CollectorInitialized{0}; + +} // namespace + +void post(BufferQueue *Q, FunctionCallTrie &&T, + FunctionCallTrie::Allocators &&A, + FunctionCallTrie::Allocators::Buffers &&B, + tid_t TId) XRAY_NEVER_INSTRUMENT { + DCHECK_NE(Q, nullptr); + + // Bail out early if the collector has not been initialized. + if (!atomic_load(&CollectorInitialized, memory_order_acquire)) { + T.~FunctionCallTrie(); + A.~Allocators(); + Q->releaseBuffer(B.NodeBuffer); + Q->releaseBuffer(B.RootsBuffer); + Q->releaseBuffer(B.ShadowStackBuffer); + Q->releaseBuffer(B.NodeIdPairBuffer); + B.~Buffers(); + return; + } + + { + SpinMutexLock Lock(&GlobalMutex); + DCHECK_NE(TDAllocator, nullptr); + DCHECK_NE(TDArray, nullptr); + + if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T), + TId) == nullptr) { + // If we fail to add the data to the array, we should destroy the objects + // handed us. + T.~FunctionCallTrie(); + A.~Allocators(); + Q->releaseBuffer(B.NodeBuffer); + Q->releaseBuffer(B.RootsBuffer); + Q->releaseBuffer(B.ShadowStackBuffer); + Q->releaseBuffer(B.NodeIdPairBuffer); + B.~Buffers(); + } + } +} + +// A PathArray represents the function id's representing a stack trace. In this +// context a path is almost always represented from the leaf function in a call +// stack to a root of the call trie. +using PathArray = Array<int32_t>; + +struct ProfileRecord { + using PathAllocator = typename PathArray::AllocatorType; + + // The Path in this record is the function id's from the leaf to the root of + // the function call stack as represented from a FunctionCallTrie. + PathArray Path; + const FunctionCallTrie::Node *Node; +}; + +namespace { + +using ProfileRecordArray = Array<ProfileRecord>; + +// Walk a depth-first traversal of each root of the FunctionCallTrie to generate +// the path(s) and the data associated with the path. +static void +populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, + const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { + using StackArray = Array<const FunctionCallTrie::Node *>; + using StackAllocator = typename StackArray::AllocatorType; + StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); + StackArray DFSStack(StackAlloc); + for (const auto *R : Trie.getRoots()) { + DFSStack.Append(R); + while (!DFSStack.empty()) { + auto *Node = DFSStack.back(); + DFSStack.trim(1); + if (Node == nullptr) + continue; + auto Record = PRs.AppendEmplace(PathArray{PA}, Node); + if (Record == nullptr) + return; + DCHECK_NE(Record, nullptr); + + // Traverse the Node's parents and as we're doing so, get the FIds in + // the order they appear. + for (auto N = Node; N != nullptr; N = N->Parent) + Record->Path.Append(N->FId); + DCHECK(!Record->Path.empty()); + + for (const auto C : Node->Callees) + DFSStack.Append(C.NodePtr); + } + } +} + +static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, + const ProfileRecordArray &ProfileRecords) + XRAY_NEVER_INSTRUMENT { + auto NextPtr = static_cast<uint8_t *>( + internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + + sizeof(Header); + for (const auto &Record : ProfileRecords) { + // List of IDs follow: + for (const auto FId : Record.Path) + NextPtr = + static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + + sizeof(FId); + + // Add the sentinel here. + constexpr int32_t SentinelFId = 0; + NextPtr = static_cast<uint8_t *>( + internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + + sizeof(SentinelFId); + + // Add the node data here. + NextPtr = + static_cast<uint8_t *>(internal_memcpy( + NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + + sizeof(Record.Node->CallCount); + NextPtr = static_cast<uint8_t *>( + internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, + sizeof(Record.Node->CumulativeLocalTime))) + + sizeof(Record.Node->CumulativeLocalTime); + } + + DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size); +} + +} // namespace + +void serialize() XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&CollectorInitialized, memory_order_acquire)) + return; + + SpinMutexLock Lock(&GlobalMutex); + + // Clear out the global ProfileBuffers, if it's not empty. + for (auto &B : *ProfileBuffers) + deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size); + ProfileBuffers->trim(ProfileBuffers->size()); + + DCHECK_NE(TDArray, nullptr); + if (TDArray->empty()) + return; + + // Then repopulate the global ProfileBuffers. + u32 I = 0; + auto MaxSize = profilingFlags()->global_allocator_max; + auto ProfileArena = allocateBuffer(MaxSize); + if (ProfileArena == nullptr) + return; + + auto ProfileArenaCleanup = at_scope_exit( + [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); }); + + auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max); + if (PathArena == nullptr) + return; + + auto PathArenaCleanup = at_scope_exit( + [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); }); + + for (const auto &ThreadTrie : *TDArray) { + using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; + ProfileRecordAllocator PRAlloc(ProfileArena, + profilingFlags()->global_allocator_max); + ProfileRecord::PathAllocator PathAlloc( + PathArena, profilingFlags()->global_allocator_max); + ProfileRecordArray ProfileRecords(PRAlloc); + + // First, we want to compute the amount of space we're going to need. We'll + // use a local allocator and an __xray::Array<...> to store the intermediary + // data, then compute the size as we're going along. Then we'll allocate the + // contiguous space to contain the thread buffer data. + if (ThreadTrie.FCT.getRoots().empty()) + continue; + + populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT); + DCHECK(!ThreadTrie.FCT.getRoots().empty()); + DCHECK(!ProfileRecords.empty()); + + // Go through each record, to compute the sizes. + // + // header size = block size (4 bytes) + // + block number (4 bytes) + // + thread id (8 bytes) + // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) + // + call count (8 bytes) + // + local time (8 bytes) + // + end of record (8 bytes) + u32 CumulativeSizes = 0; + for (const auto &Record : ProfileRecords) + CumulativeSizes += 20 + (4 * Record.Path.size()); + + BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId}; + auto B = ProfileBuffers->Append({}); + B->Size = sizeof(Header) + CumulativeSizes; + B->Data = allocateBuffer(B->Size); + DCHECK_NE(B->Data, nullptr); + serializeRecords(B, Header, ProfileRecords); + } +} + +void reset() XRAY_NEVER_INSTRUMENT { + atomic_store(&CollectorInitialized, 0, memory_order_release); + SpinMutexLock Lock(&GlobalMutex); + + if (ProfileBuffers != nullptr) { + // Clear out the profile buffers that have been serialized. + for (auto &B : *ProfileBuffers) + deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size); + ProfileBuffers->trim(ProfileBuffers->size()); + ProfileBuffers = nullptr; + } + + if (TDArray != nullptr) { + // Release the resources as required. + for (auto &TD : *TDArray) { + TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer); + TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer); + TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer); + TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer); + } + // We don't bother destroying the array here because we've already + // potentially freed the backing store for the array. Instead we're going to + // reset the pointer to nullptr, and re-use the storage later instead + // (placement-new'ing into the storage as-is). + TDArray = nullptr; + } + + if (TDAllocator != nullptr) { + TDAllocator->~Allocator(); + TDAllocator = nullptr; + } + + if (Buffer.Data != nullptr) { + BQ->releaseBuffer(Buffer); + } + + if (BQ == nullptr) { + bool Success = false; + new (&BufferQueueStorage) + BufferQueue(profilingFlags()->global_allocator_max, 1, Success); + if (!Success) + return; + BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); + } else { + BQ->finalize(); + + if (BQ->init(profilingFlags()->global_allocator_max, 1) != + BufferQueue::ErrorCode::Ok) + return; + } + + if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok) + return; + + new (&ProfileBufferArrayAllocatorStorage) + ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max); + ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>( + &ProfileBufferArrayAllocatorStorage); + + new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator); + ProfileBuffers = + reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage); + + new (&ThreadDataAllocatorStorage) + ThreadDataAllocator(Buffer.Data, Buffer.Size); + TDAllocator = + reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage); + new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator); + TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage); + + atomic_store(&CollectorInitialized, 1, memory_order_release); +} + +XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Lock(&GlobalMutex); + + if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) + return {nullptr, 0}; + + static pthread_once_t Once = PTHREAD_ONCE_INIT; + static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type + FileHeaderStorage; + pthread_once( + &Once, +[]() XRAY_NEVER_INSTRUMENT { + new (&FileHeaderStorage) XRayProfilingFileHeader{}; + }); + + if (UNLIKELY(B.Data == nullptr)) { + // The first buffer should always contain the file header information. + auto &FileHeader = + *reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage); + FileHeader.Timestamp = NanoTime(); + FileHeader.PID = internal_getpid(); + return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; + } + + if (UNLIKELY(B.Data == &FileHeaderStorage)) + return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; + + BlockHeader Header; + internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); + auto NextBlock = Header.BlockNum + 1; + if (NextBlock < ProfileBuffers->size()) + return {(*ProfileBuffers)[NextBlock].Data, + (*ProfileBuffers)[NextBlock].Size}; + return {nullptr, 0}; +} + +} // namespace profileCollectorService +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h new file mode 100644 index 000000000000..6e0f252714ba --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h @@ -0,0 +1,73 @@ +//===-- xray_profile_collector.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the interface for a data collection service, for XRay +// profiling. What we implement here is an in-process service where +// FunctionCallTrie instances can be handed off by threads, to be +// consolidated/collected. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_PROFILE_COLLECTOR_H +#define XRAY_XRAY_PROFILE_COLLECTOR_H + +#include "xray_function_call_trie.h" + +#include "xray/xray_log_interface.h" + +namespace __xray { + +/// The ProfileCollectorService implements a centralised mechanism for +/// collecting FunctionCallTrie instances, indexed by thread ID. On demand, the +/// ProfileCollectorService can be queried for the most recent state of the +/// data, in a form that allows traversal. +namespace profileCollectorService { + +/// Posts the FunctionCallTrie associated with a specific Thread ID. This +/// will: +/// +/// Moves the collection of FunctionCallTrie, Allocators, and Buffers associated +/// with a thread's data to the queue. This takes ownership of the memory +/// associated with a thread, and manages those exclusively. +/// +void post(BufferQueue *Q, FunctionCallTrie &&T, + FunctionCallTrie::Allocators &&A, + FunctionCallTrie::Allocators::Buffers &&B, tid_t TId); + +/// The serialize will process all FunctionCallTrie instances in memory, and +/// turn those into specifically formatted blocks, each describing the +/// function call trie's contents in a compact form. In memory, this looks +/// like the following layout: +/// +/// - block size (32 bits) +/// - block number (32 bits) +/// - thread id (64 bits) +/// - list of records: +/// - function ids in leaf to root order, terminated by +/// 0 (32 bits per function id) +/// - call count (64 bit) +/// - cumulative local time (64 bit) +/// - record delimiter (64 bit, 0x0) +/// +void serialize(); + +/// The reset function will clear out any internal memory held by the +/// service. The intent is to have the resetting be done in calls to the +/// initialization routine, or explicitly through the flush log API. +void reset(); + +/// This nextBuffer function is meant to implement the iterator functionality, +/// provided in the XRay API. +XRayBuffer nextBuffer(XRayBuffer B); + +} // namespace profileCollectorService + +} // namespace __xray + +#endif // XRAY_XRAY_PROFILE_COLLECTOR_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp new file mode 100644 index 000000000000..ef16691562cc --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp @@ -0,0 +1,519 @@ +//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This is the implementation of a profiling handler. +// +//===----------------------------------------------------------------------===// +#include <memory> +#include <time.h> + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_flags.h" +#include "xray/xray_interface.h" +#include "xray/xray_log_interface.h" +#include "xray_buffer_queue.h" +#include "xray_flags.h" +#include "xray_profile_collector.h" +#include "xray_profiling_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" +#include <pthread.h> + +namespace __xray { + +namespace { + +static atomic_sint32_t ProfilerLogFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +static atomic_sint32_t ProfilerLogStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +static SpinMutex ProfilerOptionsMutex; + +struct ProfilingData { + atomic_uintptr_t Allocators; + atomic_uintptr_t FCT; +}; + +static pthread_key_t ProfilingKey; + +// We use a global buffer queue, which gets initialized once at initialisation +// time, and gets reset when profiling is "done". +static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type + BufferQueueStorage; +static BufferQueue *BQ = nullptr; + +thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; +thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators), + alignof(FunctionCallTrie::Allocators)>::type + AllocatorsStorage; +thread_local std::aligned_storage<sizeof(FunctionCallTrie), + alignof(FunctionCallTrie)>::type + FunctionCallTrieStorage; +thread_local ProfilingData TLD{{0}, {0}}; +thread_local atomic_uint8_t ReentranceGuard{0}; + +// We use a separate guard for ensuring that for this thread, if we're already +// cleaning up, that any signal handlers don't attempt to cleanup nor +// initialise. +thread_local atomic_uint8_t TLDInitGuard{0}; + +// We also use a separate latch to signal that the thread is exiting, and +// non-essential work should be ignored (things like recording events, etc.). +thread_local atomic_uint8_t ThreadExitingLatch{0}; + +static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { + pthread_setspecific(ProfilingKey, &TLD); + return false; + }(); + (void)ThreadOnce; + + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return nullptr; + + if (atomic_load_relaxed(&ThreadExitingLatch)) + return nullptr; + + uptr Allocators = 0; + if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, + memory_order_acq_rel)) { + bool Success = false; + auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + atomic_store(&TLD.Allocators, 0, memory_order_release); + }); + + // Acquire a set of buffers for this thread. + if (BQ == nullptr) + return nullptr; + + if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.NodeBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.RootsBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + + Success = true; + new (&AllocatorsStorage) FunctionCallTrie::Allocators( + FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); + Allocators = reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); + atomic_store(&TLD.Allocators, Allocators, memory_order_release); + } + + if (Allocators == 1) + return nullptr; + + uptr FCT = 0; + if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { + new (&FunctionCallTrieStorage) + FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( + atomic_load_relaxed(&TLD.Allocators))); + FCT = reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); + atomic_store(&TLD.FCT, FCT, memory_order_release); + } + + if (FCT == 1) + return nullptr; + + return &TLD; +} + +static void cleanupTLD() XRAY_NEVER_INSTRUMENT { + auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); + if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( + &FunctionCallTrieStorage))) + reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); + + auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); + if (Allocators == + reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) + reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); +} + +static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return; + + uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); + if (P != reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) + return; + + auto FCT = reinterpret_cast<FunctionCallTrie *>(P); + DCHECK_NE(FCT, nullptr); + + uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); + if (A != + reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) + return; + + auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); + DCHECK_NE(Allocators, nullptr); + + // Always move the data into the profile collector. + profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), + std::move(ThreadBuffers), GetTid()); + + // Re-initialize the ThreadBuffers object to a known "default" state. + ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; +} + +} // namespace + +const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_PROFILER_DEFAULT_OPTIONS + return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); +#else + return ""; +#endif +} + +XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { + if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (Verbosity()) + Report("Not flushing profiles, profiling not been finalized.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + RecursionGuard SignalGuard(ReentranceGuard); + if (!SignalGuard) { + if (Verbosity()) + Report("Cannot finalize properly inside a signal handler!\n"); + atomic_store(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_acq_rel); + if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { + if (Verbosity()) + Report("Not flushing profiles, implementation still flushing.\n"); + return XRayLogFlushStatus::XRAY_LOG_FLUSHING; + } + + // At this point, we'll create the file that will contain the profile, but + // only if the options say so. + if (!profilingFlags()->no_flush) { + // First check whether we have data in the profile collector service + // before we try and write anything down. + XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); + if (B.Data == nullptr) { + if (Verbosity()) + Report("profiling: No data to flush.\n"); + } else { + LogWriter *LW = LogWriter::Open(); + if (LW == nullptr) { + if (Verbosity()) + Report("profiling: Failed to flush to file, dropping data.\n"); + } else { + // Now for each of the buffers, write out the profile data as we would + // see it in memory, verbatim. + while (B.Data != nullptr && B.Size != 0) { + LW->WriteAll(reinterpret_cast<const char *>(B.Data), + reinterpret_cast<const char *>(B.Data) + B.Size); + B = profileCollectorService::nextBuffer(B); + } + } + LogWriter::Close(LW); + } + } + + profileCollectorService::reset(); + + atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +void profilingHandleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + unsigned char CPU; + auto TSC = readTSC(CPU); + RecursionGuard G(ReentranceGuard); + if (!G) + return; + + auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || + Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) + return; + + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || + Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { + postCurrentThreadFCT(TLD); + return; + } + + auto T = getThreadLocalData(); + if (T == nullptr) + return; + + auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + FCT->enterFunction(FuncId, TSC, CPU); + break; + case XRayEntryType::EXIT: + case XRayEntryType::TAIL: + FCT->exitFunction(FuncId, TSC, CPU); + break; + default: + // FIXME: Handle bugs. + break; + } +} + +void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, + uint64_t) XRAY_NEVER_INSTRUMENT { + return profilingHandleArg0(FuncId, Entry); +} + +XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; + if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_FINALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot finalize profile, the profiling is not initialized.\n"); + return static_cast<XRayLogInitStatus>(CurrentStatus); + } + + // Mark then finalize the current generation of buffers. This allows us to let + // the threads currently holding onto new buffers still use them, but let the + // last reference do the memory cleanup. + DCHECK_NE(BQ, nullptr); + BQ->finalize(); + + // Wait a grace period to allow threads to see that we're finalizing. + SleepForMillis(profilingFlags()->grace_period_ms); + + // If we for some reason are entering this function from an instrumented + // handler, we bail out. + RecursionGuard G(ReentranceGuard); + if (!G) + return static_cast<XRayLogInitStatus>(CurrentStatus); + + // Post the current thread's data if we have any. + postCurrentThreadFCT(TLD); + + // Then we force serialize the log data. + profileCollectorService::serialize(); + + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogInitStatus +profilingLoggingInit(size_t, size_t, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + RecursionGuard G(ReentranceGuard); + if (!G) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZING, + memory_order_acq_rel)) { + if (Verbosity()) + Report("Cannot initialize already initialised profiling " + "implementation.\n"); + return static_cast<XRayLogInitStatus>(CurrentStatus); + } + + { + SpinMutexLock Lock(&ProfilerOptionsMutex); + FlagParser ConfigParser; + ProfilerFlags Flags; + Flags.setDefaults(); + registerProfilerFlags(&ConfigParser, &Flags); + ConfigParser.ParseString(profilingCompilerDefinedFlags()); + const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); + if (Env == nullptr) + Env = ""; + ConfigParser.ParseString(Env); + + // Then parse the configuration string provided. + ConfigParser.ParseString(static_cast<const char *>(Options)); + if (Verbosity()) + ReportUnrecognizedFlags(); + *profilingFlags() = Flags; + } + + // We need to reset the profile data collection implementation now. + profileCollectorService::reset(); + + // Then also reset the buffer queue implementation. + if (BQ == nullptr) { + bool Success = false; + new (&BufferQueueStorage) + BufferQueue(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max, Success); + if (!Success) { + if (Verbosity()) + Report("Failed to initialize preallocated memory buffers!"); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + // If we've succeded, set the global pointer to the initialised storage. + BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); + } else { + BQ->finalize(); + auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max); + + if (InitStatus != BufferQueue::ErrorCode::Ok) { + if (Verbosity()) + Report("Failed to initialize preallocated memory buffers; error: %s", + BufferQueue::getErrorString(InitStatus)); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + DCHECK(!BQ->finalizing()); + } + + // We need to set up the exit handlers. + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + pthread_key_create( + &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + if (P == nullptr) + return; + + auto T = reinterpret_cast<ProfilingData *>(P); + if (atomic_load_relaxed(&T->Allocators) == 0) + return; + + { + // If we're somehow executing this while inside a + // non-reentrant-friendly context, we skip attempting to post + // the current thread's data. + RecursionGuard G(ReentranceGuard); + if (!G) + return; + + postCurrentThreadFCT(*T); + } + }); + + // We also need to set up an exit handler, so that we can get the + // profile information at exit time. We use the C API to do this, to not + // rely on C++ ABI functions for registering exit handlers. + Atexit(+[]() XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + auto Cleanup = + at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); + + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED || + profilingFlush() != XRAY_LOG_FLUSHED) + return; + + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); + }); + + __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); + __xray_set_handler(profilingHandleArg0); + __xray_set_handler_arg1(profilingHandleArg1); + + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, + memory_order_release); + if (Verbosity()) + Report("XRay Profiling init successful.\n"); + + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { + // Set up the flag defaults from the static defaults and the + // compiler-provided defaults. + { + SpinMutexLock Lock(&ProfilerOptionsMutex); + auto *F = profilingFlags(); + F->setDefaults(); + FlagParser ProfilingParser; + registerProfilerFlags(&ProfilingParser, F); + ProfilingParser.ParseString(profilingCompilerDefinedFlags()); + } + + XRayLogImpl Impl{ + profilingLoggingInit, + profilingFinalize, + profilingHandleArg0, + profilingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " + "%d\n", + RegistrationResult); + return false; + } + + if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) + __xray_log_select_mode("xray_profiling"); + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::profilingDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp new file mode 100644 index 000000000000..0e89b7420f8c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp @@ -0,0 +1,39 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#include "xray_profiling_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +namespace __xray { + +// Storage for the profiling flags. +ProfilerFlags xray_profiling_flags_dont_use_directly; + +void ProfilerFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +void registerProfilerFlags(FlagParser *P, + ProfilerFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h new file mode 100644 index 000000000000..d67f240adc88 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h @@ -0,0 +1,38 @@ +//===-- xray_profiling_flags.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay profiling runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_PROFILER_FLAGS_H +#define XRAY_PROFILER_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct ProfilerFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG + + void setDefaults(); +}; + +extern ProfilerFlags xray_profiling_flags_dont_use_directly; +inline ProfilerFlags *profilingFlags() { + return &xray_profiling_flags_dont_use_directly; +} +void registerProfilerFlags(FlagParser *P, ProfilerFlags *F); + +} // namespace __xray + +#endif // XRAY_PROFILER_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc new file mode 100644 index 000000000000..4f6138872af7 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc @@ -0,0 +1,31 @@ +//===-- xray_profiling_flags.inc --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay profiling runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(uptr, per_thread_allocator_max, 16384, + "Maximum size of any single per-thread allocator.") +XRAY_FLAG(uptr, global_allocator_max, 2 << 24, + "Maximum size of the global allocator for profile storage.") +XRAY_FLAG(uptr, stack_allocator_max, 2 << 20, + "Maximum size of the traversal stack allocator.") +XRAY_FLAG(int, grace_period_ms, 1, + "Profile collection will wait this much time in milliseconds before " + "resetting the global state. This gives a chance to threads to " + "notice that the profiler has been finalized and clean up.") +XRAY_FLAG(bool, no_flush, false, + "Set to true if we want the profiling implementation to not write " + "out files.") +XRAY_FLAG(int, buffers_max, 128, + "The number of buffers to pre-allocate used by the profiling " + "implementation.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h new file mode 100644 index 000000000000..3b6158a2d36c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h @@ -0,0 +1,56 @@ +//===-- xray_recursion_guard.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_RECURSION_GUARD_H +#define XRAY_XRAY_RECURSION_GUARD_H + +#include "sanitizer_common/sanitizer_atomic.h" + +namespace __xray { + +/// The RecursionGuard is useful for guarding against signal handlers which are +/// also potentially calling XRay-instrumented functions. To use the +/// RecursionGuard, you'll typically need a thread_local atomic_uint8_t: +/// +/// thread_local atomic_uint8_t Guard{0}; +/// +/// // In a handler function: +/// void handleArg0(int32_t F, XRayEntryType T) { +/// RecursionGuard G(Guard); +/// if (!G) +/// return; // Failed to acquire the guard. +/// ... +/// } +/// +class RecursionGuard { + atomic_uint8_t &Running; + const bool Valid; + +public: + explicit inline RecursionGuard(atomic_uint8_t &R) + : Running(R), Valid(!atomic_exchange(&R, 1, memory_order_acq_rel)) {} + + inline RecursionGuard(const RecursionGuard &) = delete; + inline RecursionGuard(RecursionGuard &&) = delete; + inline RecursionGuard &operator=(const RecursionGuard &) = delete; + inline RecursionGuard &operator=(RecursionGuard &&) = delete; + + explicit inline operator bool() const { return Valid; } + + inline ~RecursionGuard() noexcept { + if (Valid) + atomic_store(&Running, 0, memory_order_release); + } +}; + +} // namespace __xray + +#endif // XRAY_XRAY_RECURSION_GUARD_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h new file mode 100644 index 000000000000..6eb673edffea --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h @@ -0,0 +1,650 @@ +//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the implementation of a segmented array, with fixed-size segments +// backing the segments. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_SEGMENTED_ARRAY_H +#define XRAY_SEGMENTED_ARRAY_H + +#include "sanitizer_common/sanitizer_allocator.h" +#include "xray_allocator.h" +#include "xray_utils.h" +#include <cassert> +#include <type_traits> +#include <utility> + +namespace __xray { + +/// The Array type provides an interface similar to std::vector<...> but does +/// not shrink in size. Once constructed, elements can be appended but cannot be +/// removed. The implementation is heavily dependent on the contract provided by +/// the Allocator type, in that all memory will be released when the Allocator +/// is destroyed. When an Array is destroyed, it will destroy elements in the +/// backing store but will not free the memory. +template <class T> class Array { + struct Segment { + Segment *Prev; + Segment *Next; + char Data[1]; + }; + +public: + // Each segment of the array will be laid out with the following assumptions: + // + // - Each segment will be on a cache-line address boundary (kCacheLineSize + // aligned). + // + // - The elements will be accessed through an aligned pointer, dependent on + // the alignment of T. + // + // - Each element is at least two-pointers worth from the beginning of the + // Segment, aligned properly, and the rest of the elements are accessed + // through appropriate alignment. + // + // We then compute the size of the segment to follow this logic: + // + // - Compute the number of elements that can fit within + // kCacheLineSize-multiple segments, minus the size of two pointers. + // + // - Request cacheline-multiple sized elements from the allocator. + static constexpr uint64_t AlignedElementStorageSize = + sizeof(typename std::aligned_storage<sizeof(T), alignof(T)>::type); + + static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2; + + static constexpr uint64_t SegmentSize = nearest_boundary( + SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize); + + using AllocatorType = Allocator<SegmentSize>; + + static constexpr uint64_t ElementsPerSegment = + (SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T)); + + static_assert(ElementsPerSegment > 0, + "Must have at least 1 element per segment."); + + static Segment SentinelSegment; + + using size_type = uint64_t; + +private: + // This Iterator models a BidirectionalIterator. + template <class U> class Iterator { + Segment *S = &SentinelSegment; + uint64_t Offset = 0; + uint64_t Size = 0; + + public: + Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT + : S(IS), + Offset(Off), + Size(S) {} + Iterator(const Iterator &) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator() NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator(Iterator &&) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(const Iterator &) XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(Iterator &&) XRAY_NEVER_INSTRUMENT = default; + ~Iterator() XRAY_NEVER_INSTRUMENT = default; + + Iterator &operator++() XRAY_NEVER_INSTRUMENT { + if (++Offset % ElementsPerSegment || Offset == Size) + return *this; + + // At this point, we know that Offset % N == 0, so we must advance the + // segment pointer. + DCHECK_EQ(Offset % ElementsPerSegment, 0); + DCHECK_NE(Offset, Size); + DCHECK_NE(S, &SentinelSegment); + DCHECK_NE(S->Next, &SentinelSegment); + S = S->Next; + DCHECK_NE(S, &SentinelSegment); + return *this; + } + + Iterator &operator--() XRAY_NEVER_INSTRUMENT { + DCHECK_NE(S, &SentinelSegment); + DCHECK_GT(Offset, 0); + + auto PreviousOffset = Offset--; + if (PreviousOffset != Size && PreviousOffset % ElementsPerSegment == 0) { + DCHECK_NE(S->Prev, &SentinelSegment); + S = S->Prev; + } + + return *this; + } + + Iterator operator++(int) XRAY_NEVER_INSTRUMENT { + Iterator Copy(*this); + ++(*this); + return Copy; + } + + Iterator operator--(int) XRAY_NEVER_INSTRUMENT { + Iterator Copy(*this); + --(*this); + return Copy; + } + + template <class V, class W> + friend bool operator==(const Iterator<V> &L, + const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { + return L.S == R.S && L.Offset == R.Offset; + } + + template <class V, class W> + friend bool operator!=(const Iterator<V> &L, + const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { + return !(L == R); + } + + U &operator*() const XRAY_NEVER_INSTRUMENT { + DCHECK_NE(S, &SentinelSegment); + auto RelOff = Offset % ElementsPerSegment; + + // We need to compute the character-aligned pointer, offset from the + // segment's Data location to get the element in the position of Offset. + auto Base = &S->Data; + auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize); + return *reinterpret_cast<U *>(AlignedOffset); + } + + U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); } + }; + + AllocatorType *Alloc; + Segment *Head; + Segment *Tail; + + // Here we keep track of segments in the freelist, to allow us to re-use + // segments when elements are trimmed off the end. + Segment *Freelist; + uint64_t Size; + + // =============================== + // In the following implementation, we work through the algorithms and the + // list operations using the following notation: + // + // - pred(s) is the predecessor (previous node accessor) and succ(s) is + // the successor (next node accessor). + // + // - S is a sentinel segment, which has the following property: + // + // pred(S) == succ(S) == S + // + // - @ is a loop operator, which can imply pred(s) == s if it appears on + // the left of s, or succ(s) == S if it appears on the right of s. + // + // - sL <-> sR : means a bidirectional relation between sL and sR, which + // means: + // + // succ(sL) == sR && pred(SR) == sL + // + // - sL -> sR : implies a unidirectional relation between sL and SR, + // with the following properties: + // + // succ(sL) == sR + // + // sL <- sR : implies a unidirectional relation between sR and sL, + // with the following properties: + // + // pred(sR) == sL + // + // =============================== + + Segment *NewSegment() XRAY_NEVER_INSTRUMENT { + // We need to handle the case in which enough elements have been trimmed to + // allow us to re-use segments we've allocated before. For this we look into + // the Freelist, to see whether we need to actually allocate new blocks or + // just re-use blocks we've already seen before. + if (Freelist != &SentinelSegment) { + // The current state of lists resemble something like this at this point: + // + // Freelist: @S@<-f0->...<->fN->@S@ + // ^ Freelist + // + // We want to perform a splice of `f0` from Freelist to a temporary list, + // which looks like: + // + // Templist: @S@<-f0->@S@ + // ^ FreeSegment + // + // Our algorithm preconditions are: + DCHECK_EQ(Freelist->Prev, &SentinelSegment); + + // Then the algorithm we implement is: + // + // SFS = Freelist + // Freelist = succ(Freelist) + // if (Freelist != S) + // pred(Freelist) = S + // succ(SFS) = S + // pred(SFS) = S + // + auto *FreeSegment = Freelist; + Freelist = Freelist->Next; + + // Note that we need to handle the case where Freelist is now pointing to + // S, which we don't want to be overwriting. + // TODO: Determine whether the cost of the branch is higher than the cost + // of the blind assignment. + if (Freelist != &SentinelSegment) + Freelist->Prev = &SentinelSegment; + + FreeSegment->Next = &SentinelSegment; + FreeSegment->Prev = &SentinelSegment; + + // Our postconditions are: + DCHECK_EQ(Freelist->Prev, &SentinelSegment); + DCHECK_NE(FreeSegment, &SentinelSegment); + return FreeSegment; + } + + auto SegmentBlock = Alloc->Allocate(); + if (SegmentBlock.Data == nullptr) + return nullptr; + + // Placement-new the Segment element at the beginning of the SegmentBlock. + new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}}; + auto SB = reinterpret_cast<Segment *>(SegmentBlock.Data); + return SB; + } + + Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { + DCHECK_EQ(Head, &SentinelSegment); + DCHECK_EQ(Tail, &SentinelSegment); + auto S = NewSegment(); + if (S == nullptr) + return nullptr; + DCHECK_EQ(S->Next, &SentinelSegment); + DCHECK_EQ(S->Prev, &SentinelSegment); + DCHECK_NE(S, &SentinelSegment); + Head = S; + Tail = S; + DCHECK_EQ(Head, Tail); + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(Tail->Prev, &SentinelSegment); + return S; + } + + Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { + auto S = NewSegment(); + if (S == nullptr) + return nullptr; + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(S->Prev, &SentinelSegment); + DCHECK_EQ(S->Next, &SentinelSegment); + S->Prev = Tail; + Tail->Next = S; + Tail = S; + DCHECK_EQ(S, S->Prev->Next); + DCHECK_EQ(Tail->Next, &SentinelSegment); + return S; + } + +public: + explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT + : Alloc(&A), + Head(&SentinelSegment), + Tail(&SentinelSegment), + Freelist(&SentinelSegment), + Size(0) {} + + Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr), + Head(&SentinelSegment), + Tail(&SentinelSegment), + Freelist(&SentinelSegment), + Size(0) {} + + Array(const Array &) = delete; + Array &operator=(const Array &) = delete; + + Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc), + Head(O.Head), + Tail(O.Tail), + Freelist(O.Freelist), + Size(O.Size) { + O.Alloc = nullptr; + O.Head = &SentinelSegment; + O.Tail = &SentinelSegment; + O.Size = 0; + O.Freelist = &SentinelSegment; + } + + Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT { + Alloc = O.Alloc; + O.Alloc = nullptr; + Head = O.Head; + O.Head = &SentinelSegment; + Tail = O.Tail; + O.Tail = &SentinelSegment; + Freelist = O.Freelist; + O.Freelist = &SentinelSegment; + Size = O.Size; + O.Size = 0; + return *this; + } + + ~Array() XRAY_NEVER_INSTRUMENT { + for (auto &E : *this) + (&E)->~T(); + } + + bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; } + + AllocatorType &allocator() const XRAY_NEVER_INSTRUMENT { + DCHECK_NE(Alloc, nullptr); + return *Alloc; + } + + uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; } + + template <class... Args> + T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { + DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) || + (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); + if (UNLIKELY(Head == &SentinelSegment)) { + auto R = InitHeadAndTail(); + if (R == nullptr) + return nullptr; + } + + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); + + auto Offset = Size % ElementsPerSegment; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewSegment() == nullptr) + return nullptr; + + DCHECK_NE(Tail, &SentinelSegment); + auto Base = &Tail->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + DCHECK_LE(AlignedOffset + sizeof(T), + reinterpret_cast<unsigned char *>(Base) + SegmentSize); + + // In-place construct at Position. + new (AlignedOffset) T{std::forward<Args>(args)...}; + ++Size; + return reinterpret_cast<T *>(AlignedOffset); + } + + T *Append(const T &E) XRAY_NEVER_INSTRUMENT { + // FIXME: This is a duplication of AppenEmplace with the copy semantics + // explicitly used, as a work-around to GCC 4.8 not invoking the copy + // constructor with the placement new with braced-init syntax. + DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) || + (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); + if (UNLIKELY(Head == &SentinelSegment)) { + auto R = InitHeadAndTail(); + if (R == nullptr) + return nullptr; + } + + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); + + auto Offset = Size % ElementsPerSegment; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewSegment() == nullptr) + return nullptr; + + DCHECK_NE(Tail, &SentinelSegment); + auto Base = &Tail->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + DCHECK_LE(AlignedOffset + sizeof(T), + reinterpret_cast<unsigned char *>(Tail) + SegmentSize); + + // In-place construct at Position. + new (AlignedOffset) T(E); + ++Size; + return reinterpret_cast<T *>(AlignedOffset); + } + + T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT { + DCHECK_LE(Offset, Size); + // We need to traverse the array enough times to find the element at Offset. + auto S = Head; + while (Offset >= ElementsPerSegment) { + S = S->Next; + Offset -= ElementsPerSegment; + DCHECK_NE(S, &SentinelSegment); + } + auto Base = &S->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast<T *>(AlignedOffset); + return *reinterpret_cast<T *>(Position); + } + + T &front() const XRAY_NEVER_INSTRUMENT { + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Size, 0u); + return *begin(); + } + + T &back() const XRAY_NEVER_INSTRUMENT { + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_NE(Size, 0u); + auto It = end(); + --It; + return *It; + } + + template <class Predicate> + T *find_element(Predicate P) const XRAY_NEVER_INSTRUMENT { + if (empty()) + return nullptr; + + auto E = end(); + for (auto I = begin(); I != E; ++I) + if (P(*I)) + return &(*I); + + return nullptr; + } + + /// Remove N Elements from the end. This leaves the blocks behind, and not + /// require allocation of new blocks for new elements added after trimming. + void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT { + auto OldSize = Size; + Elements = Elements > Size ? Size : Elements; + Size -= Elements; + + // We compute the number of segments we're going to return from the tail by + // counting how many elements have been trimmed. Given the following: + // + // - Each segment has N valid positions, where N > 0 + // - The previous size > current size + // + // To compute the number of segments to return, we need to perform the + // following calculations for the number of segments required given 'x' + // elements: + // + // f(x) = { + // x == 0 : 0 + // , 0 < x <= N : 1 + // , N < x <= max : x / N + (x % N ? 1 : 0) + // } + // + // We can simplify this down to: + // + // f(x) = { + // x == 0 : 0, + // , 0 < x <= max : x / N + (x < N || x % N ? 1 : 0) + // } + // + // And further down to: + // + // f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0 + // + // We can then perform the following calculation `s` which counts the number + // of segments we need to remove from the end of the data structure: + // + // s(p, c) = f(p) - f(c) + // + // If we treat p = previous size, and c = current size, and given the + // properties above, the possible range for s(...) is [0..max(typeof(p))/N] + // given that typeof(p) == typeof(c). + auto F = [](uint64_t X) { + return X ? (X / ElementsPerSegment) + + (X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0) + : 0; + }; + auto PS = F(OldSize); + auto CS = F(Size); + DCHECK_GE(PS, CS); + auto SegmentsToTrim = PS - CS; + for (auto I = 0uL; I < SegmentsToTrim; ++I) { + // Here we place the current tail segment to the freelist. To do this + // appropriately, we need to perform a splice operation on two + // bidirectional linked-lists. In particular, we have the current state of + // the doubly-linked list of segments: + // + // @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@ + // + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_EQ(Tail->Next, &SentinelSegment); + + if (Freelist == &SentinelSegment) { + // Our two lists at this point are in this configuration: + // + // Freelist: (potentially) @S@ + // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ + // ^ Head ^ Tail + // + // The end state for us will be this configuration: + // + // Freelist: @S@<-sT->@S@ + // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ + // ^ Head ^ Tail + // + // The first step for us is to hold a reference to the tail of Mainlist, + // which in our notation is represented by sT. We call this our "free + // segment" which is the segment we are placing on the Freelist. + // + // sF = sT + // + // Then, we also hold a reference to the "pre-tail" element, which we + // call sPT: + // + // sPT = pred(sT) + // + // We want to splice sT into the beginning of the Freelist, which in + // an empty Freelist means placing a segment whose predecessor and + // successor is the sentinel segment. + // + // The splice operation then can be performed in the following + // algorithm: + // + // succ(sPT) = S + // pred(sT) = S + // succ(sT) = Freelist + // Freelist = sT + // Tail = sPT + // + auto SPT = Tail->Prev; + SPT->Next = &SentinelSegment; + Tail->Prev = &SentinelSegment; + Tail->Next = Freelist; + Freelist = Tail; + Tail = SPT; + + // Our post-conditions here are: + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(Freelist->Prev, &SentinelSegment); + } else { + // In the other case, where the Freelist is not empty, we perform the + // following transformation instead: + // + // This transforms the current state: + // + // Freelist: @S@<-f0->@S@ + // ^ Freelist + // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ + // ^ Head ^ Tail + // + // Into the following: + // + // Freelist: @S@<-sT<->f0->@S@ + // ^ Freelist + // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ + // ^ Head ^ Tail + // + // The algorithm is: + // + // sFH = Freelist + // sPT = pred(sT) + // pred(SFH) = sT + // succ(sT) = Freelist + // pred(sT) = S + // succ(sPT) = S + // Tail = sPT + // Freelist = sT + // + auto SFH = Freelist; + auto SPT = Tail->Prev; + auto ST = Tail; + SFH->Prev = ST; + ST->Next = Freelist; + ST->Prev = &SentinelSegment; + SPT->Next = &SentinelSegment; + Tail = SPT; + Freelist = ST; + + // Our post-conditions here are: + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(Freelist->Prev, &SentinelSegment); + DCHECK_EQ(Freelist->Next->Prev, Freelist); + } + } + + // Now in case we've spliced all the segments in the end, we ensure that the + // main list is "empty", or both the head and tail pointing to the sentinel + // segment. + if (Tail == &SentinelSegment) + Head = Tail; + + DCHECK( + (Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) || + (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); + DCHECK( + (Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) || + (Freelist == &SentinelSegment && Tail->Next == &SentinelSegment)); + } + + // Provide iterators. + Iterator<T> begin() const XRAY_NEVER_INSTRUMENT { + return Iterator<T>(Head, 0, Size); + } + Iterator<T> end() const XRAY_NEVER_INSTRUMENT { + return Iterator<T>(Tail, Size, Size); + } + Iterator<const T> cbegin() const XRAY_NEVER_INSTRUMENT { + return Iterator<const T>(Head, 0, Size); + } + Iterator<const T> cend() const XRAY_NEVER_INSTRUMENT { + return Iterator<const T>(Tail, Size, Size); + } +}; + +// We need to have this storage definition out-of-line so that the compiler can +// ensure that storage for the SentinelSegment is defined and has a single +// address. +template <class T> +typename Array<T>::Segment Array<T>::SentinelSegment{ + &Array<T>::SentinelSegment, &Array<T>::SentinelSegment, {'\0'}}; + +} // namespace __xray + +#endif // XRAY_SEGMENTED_ARRAY_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S new file mode 100644 index 000000000000..3bf52cef60fe --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S @@ -0,0 +1,163 @@ +#include "../builtins/assembly.h" + + .text + /* The variable containing the handler function pointer */ + .global _ZN6__xray19XRayPatchedFunctionE + /* Word-aligned function entry point */ + .p2align 2 + /* Let C/C++ see the symbol */ + .global __xray_FunctionEntry + .hidden __xray_FunctionEntry + .type __xray_FunctionEntry, %function + /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with + FuncId passed in W0 register. */ +__xray_FunctionEntry: + /* Move the return address beyond the end of sled data. The 12 bytes of + data are inserted in the code of the runtime patch, between the call + instruction and the instruction returned into. The data contains 32 + bits of instrumented function ID and 64 bits of the address of + the current trampoline. */ + ADD X30, X30, #12 + /* Push the registers which may be modified by the handler function */ + STP X1, X2, [SP, #-16]! + STP X3, X4, [SP, #-16]! + STP X5, X6, [SP, #-16]! + STP X7, X30, [SP, #-16]! + STP Q0, Q1, [SP, #-32]! + STP Q2, Q3, [SP, #-32]! + STP Q4, Q5, [SP, #-32]! + STP Q6, Q7, [SP, #-32]! + /* X8 is the indirect result register and needs to be preserved for the body + of the function to use */ + STP X8, X0, [SP, #-16]! + + /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ + ADRP X1, _ZN6__xray19XRayPatchedFunctionE + /* Load the handler function pointer into X2 */ + LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] + /* Handler address is nullptr if handler is not set */ + CMP X2, #0 + BEQ FunctionEntry_restore + /* Function ID is already in W0 (the first parameter). + X1=0 means that we are tracing an entry event */ + MOV X1, #0 + /* Call the handler with 2 parameters in W0 and X1 */ + BLR X2 +FunctionEntry_restore: + /* Pop the saved registers */ + LDP X8, X0, [SP], #16 + LDP Q6, Q7, [SP], #32 + LDP Q4, Q5, [SP], #32 + LDP Q2, Q3, [SP], #32 + LDP Q0, Q1, [SP], #32 + LDP X7, X30, [SP], #16 + LDP X5, X6, [SP], #16 + LDP X3, X4, [SP], #16 + LDP X1, X2, [SP], #16 + RET + + /* Word-aligned function entry point */ + .p2align 2 + /* Let C/C++ see the symbol */ + .global __xray_FunctionExit + .hidden __xray_FunctionExit + .type __xray_FunctionExit, %function + /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with + FuncId passed in W0 register. */ +__xray_FunctionExit: + /* Move the return address beyond the end of sled data. The 12 bytes of + data are inserted in the code of the runtime patch, between the call + instruction and the instruction returned into. The data contains 32 + bits of instrumented function ID and 64 bits of the address of + the current trampoline. */ + ADD X30, X30, #12 + /* Push the registers which may be modified by the handler function */ + STP X1, X2, [SP, #-16]! + STP X3, X4, [SP, #-16]! + STP X5, X6, [SP, #-16]! + STP X7, X30, [SP, #-16]! + STP Q0, Q1, [SP, #-32]! + STP Q2, Q3, [SP, #-32]! + STP Q4, Q5, [SP, #-32]! + STP Q6, Q7, [SP, #-32]! + /* X8 is the indirect result register and needs to be preserved for the body + of the function to use */ + STP X8, X0, [SP, #-16]! + + /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ + ADRP X1, _ZN6__xray19XRayPatchedFunctionE + /* Load the handler function pointer into X2 */ + LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] + /* Handler address is nullptr if handler is not set */ + CMP X2, #0 + BEQ FunctionExit_restore + /* Function ID is already in W0 (the first parameter). + X1=1 means that we are tracing an exit event */ + MOV X1, #1 + /* Call the handler with 2 parameters in W0 and X1 */ + BLR X2 +FunctionExit_restore: + LDP X8, X0, [SP], #16 + LDP Q6, Q7, [SP], #32 + LDP Q4, Q5, [SP], #32 + LDP Q2, Q3, [SP], #32 + LDP Q0, Q1, [SP], #32 + LDP X7, X30, [SP], #16 + LDP X5, X6, [SP], #16 + LDP X3, X4, [SP], #16 + LDP X1, X2, [SP], #16 + RET + + /* Word-aligned function entry point */ + .p2align 2 + /* Let C/C++ see the symbol */ + .global __xray_FunctionTailExit + .hidden __xray_FunctionTailExit + .type __xray_FunctionTailExit, %function + /* In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId) + with FuncId passed in W0 register. */ +__xray_FunctionTailExit: + /* Move the return address beyond the end of sled data. The 12 bytes of + data are inserted in the code of the runtime patch, between the call + instruction and the instruction returned into. The data contains 32 + bits of instrumented function ID and 64 bits of the address of + the current trampoline. */ + ADD X30, X30, #12 + /* Push the registers which may be modified by the handler function */ + STP X1, X2, [SP, #-16]! + STP X3, X4, [SP, #-16]! + STP X5, X6, [SP, #-16]! + STP X7, X30, [SP, #-16]! + /* Push the parameters of the tail called function */ + STP Q0, Q1, [SP, #-32]! + STP Q2, Q3, [SP, #-32]! + STP Q4, Q5, [SP, #-32]! + STP Q6, Q7, [SP, #-32]! + /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ + ADRP X1, _ZN6__xray19XRayPatchedFunctionE + /* Load the handler function pointer into X2 */ + LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] + /* Handler address is nullptr if handler is not set */ + CMP X2, #0 + BEQ FunctionTailExit_restore + /* Function ID is already in W0 (the first parameter). + X1=2 means that we are tracing a tail exit event, but before the + logging part of XRay is ready, we pretend that here a normal function + exit happens, so we give the handler code 1 */ + MOV X1, #1 + /* Call the handler with 2 parameters in W0 and X1 */ + BLR X2 +FunctionTailExit_restore: + /* Pop the parameters of the tail called function */ + LDP Q6, Q7, [SP], #32 + LDP Q4, Q5, [SP], #32 + LDP Q2, Q3, [SP], #32 + LDP Q0, Q1, [SP], #32 + /* Pop the registers which may be modified by the handler function */ + LDP X7, X30, [SP], #16 + LDP X5, X6, [SP], #16 + LDP X3, X4, [SP], #16 + LDP X1, X2, [SP], #16 + RET + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S new file mode 100644 index 000000000000..3ffc1e443761 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S @@ -0,0 +1,105 @@ +#include "../builtins/assembly.h" + + .syntax unified + .arch armv6t2 + .fpu vfpv2 + .code 32 + .global _ZN6__xray19XRayPatchedFunctionE + + @ Word-aligned function entry point + .p2align 2 + @ Let C/C++ see the symbol + .global __xray_FunctionEntry + .hidden __xray_FunctionEntry + @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) + @ Assume that "q" part of the floating-point registers is not used + @ for passing parameters to C/C++ functions. + .type __xray_FunctionEntry, %function + @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with + @ FuncId passed in r0 register. +__xray_FunctionEntry: + PUSH {r1-r3,lr} + @ Save floating-point parameters of the instrumented function + VPUSH {d0-d7} + MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) + MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) + LDR r2, [pc, r1] + @ Handler address is nullptr if handler is not set + CMP r2, #0 + BEQ FunctionEntry_restore + @ Function ID is already in r0 (the first parameter). + @ r1=0 means that we are tracing an entry event + MOV r1, #0 + @ Call the handler with 2 parameters in r0 and r1 + BLX r2 +FunctionEntry_restore: + @ Restore floating-point parameters of the instrumented function + VPOP {d0-d7} + POP {r1-r3,pc} + + @ Word-aligned function entry point + .p2align 2 + @ Let C/C++ see the symbol + .global __xray_FunctionExit + .hidden __xray_FunctionExit + @ Assume that d1-d7 are not used for the return value. + @ Assume that "q" part of the floating-point registers is not used for the + @ return value in C/C++. + .type __xray_FunctionExit, %function + @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with + @ FuncId passed in r0 register. +__xray_FunctionExit: + PUSH {r1-r3,lr} + @ Save the floating-point return value of the instrumented function + VPUSH {d0} + @ Load the handler address + MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) + MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) + LDR r2, [pc, r1] + @ Handler address is nullptr if handler is not set + CMP r2, #0 + BEQ FunctionExit_restore + @ Function ID is already in r0 (the first parameter). + @ 1 means that we are tracing an exit event + MOV r1, #1 + @ Call the handler with 2 parameters in r0 and r1 + BLX r2 +FunctionExit_restore: + @ Restore the floating-point return value of the instrumented function + VPOP {d0} + POP {r1-r3,pc} + + @ Word-aligned function entry point + .p2align 2 + @ Let C/C++ see the symbol + .global __xray_FunctionTailExit + .hidden __xray_FunctionTailExit + @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) + @ Assume that "q" part of the floating-point registers is not used + @ for passing parameters to C/C++ functions. + .type __xray_FunctionTailExit, %function + @ In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId) + @ with FuncId passed in r0 register. +__xray_FunctionTailExit: + PUSH {r1-r3,lr} + @ Save floating-point parameters of the instrumented function + VPUSH {d0-d7} + MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) + MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) + LDR r2, [pc, r1] + @ Handler address is nullptr if handler is not set + CMP r2, #0 + BEQ FunctionTailExit_restore + @ Function ID is already in r0 (the first parameter). + @ r1=2 means that we are tracing a tail exit event + @ But before the logging part of XRay is ready, we pretend that here a + @ normal function exit happens, so we give the handler code 1 + MOV r1, #1 + @ Call the handler with 2 parameters in r0 and r1 + BLX r2 +FunctionTailExit_restore: + @ Restore floating-point parameters of the instrumented function + VPOP {d0-d7} + POP {r1-r3,pc} + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S new file mode 100644 index 000000000000..499c350d2a24 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S @@ -0,0 +1,109 @@ +//===-- xray_trampoline_mips.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the MIPS-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + + .text + .file "xray_trampoline_mips.S" + .globl __xray_FunctionEntry + .p2align 2 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + .cfi_startproc + .set noreorder + .cpload $t9 + .set reorder + // Save argument registers before doing any actual work + .cfi_def_cfa_offset 36 + addiu $sp, $sp, -36 + sw $ra, 32($sp) + .cfi_offset 31, -4 + sw $a3, 28($sp) + sw $a2, 24($sp) + sw $a1, 20($sp) + sw $a0, 16($sp) + sdc1 $f14, 8($sp) + sdc1 $f12, 0($sp) + + la $t9, _ZN6__xray19XRayPatchedFunctionE + lw $t9, 0($t9) + + beqz $t9, FunctionEntry_restore + + // a1=0 means that we are tracing an entry event + move $a1, $zero + // Function ID is in t0 (the first parameter). + move $a0, $t0 + jalr $t9 + +FunctionEntry_restore: + // Restore argument registers + ldc1 $f12, 0($sp) + ldc1 $f14, 8($sp) + lw $a0, 16($sp) + lw $a1, 20($sp) + lw $a2, 24($sp) + lw $a3, 28($sp) + lw $ra, 32($sp) + addiu $sp, $sp, 36 + jr $ra +FunctionEntry_end: + .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry + .cfi_endproc + + .text + .globl __xray_FunctionExit + .p2align 2 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + .set noreorder + .cpload $t9 + .set reorder + // Save return registers before doing any actual work. + .cfi_def_cfa_offset 36 + addiu $sp, $sp, -36 + sw $ra, 32($sp) + .cfi_offset 31, -4 + sw $a1, 28($sp) + sw $a0, 24($sp) + sw $v1, 20($sp) + sw $v0, 16($sp) + sdc1 $f2, 8($sp) + sdc1 $f0, 0($sp) + + la $t9, _ZN6__xray19XRayPatchedFunctionE + lw $t9, 0($t9) + + beqz $t9, FunctionExit_restore + + // a1=1 means that we are tracing an exit event + li $a1, 1 + // Function ID is in t0 (the first parameter). + move $a0, $t0 + jalr $t9 + +FunctionExit_restore: + // Restore return registers + ldc1 $f0, 0($sp) + ldc1 $f2, 8($sp) + lw $v0, 16($sp) + lw $v1, 20($sp) + lw $a0, 24($sp) + lw $a1, 28($sp) + lw $ra, 32($sp) + addiu $sp, $sp, 36 + jr $ra + +FunctionExit_end: + .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit + .cfi_endproc diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S new file mode 100644 index 000000000000..d65bec1fc687 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S @@ -0,0 +1,135 @@ +//===-- xray_trampoline_mips64.s --------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the MIPS64-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + + .text + .file "xray_trampoline_mips64.S" + .globl __xray_FunctionEntry + .p2align 2 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + .cfi_startproc + // Save argument registers before doing any actual work. + .cfi_def_cfa_offset 144 + daddiu $sp, $sp, -144 + sd $ra, 136($sp) + .cfi_offset 31, -8 + sd $gp, 128($sp) + sd $a7, 120($sp) + sd $a6, 112($sp) + sd $a5, 104($sp) + sd $a4, 96($sp) + sd $a3, 88($sp) + sd $a2, 80($sp) + sd $a1, 72($sp) + sd $a0, 64($sp) + sdc1 $f19, 56($sp) + sdc1 $f18, 48($sp) + sdc1 $f17, 40($sp) + sdc1 $f16, 32($sp) + sdc1 $f15, 24($sp) + sdc1 $f14, 16($sp) + sdc1 $f13, 8($sp) + sdc1 $f12, 0($sp) + + lui $gp, %hi(%neg(%gp_rel(__xray_FunctionEntry))) + daddu $gp, $gp, $t9 + daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionEntry))) + + dla $t9, _ZN6__xray19XRayPatchedFunctionE + ld $t9, 0($t9) + + beqz $t9, FunctionEntry_restore + + // a1=0 means that we are tracing an entry event + move $a1, $zero + // Function ID is in t0 (the first parameter). + move $a0, $t0 + jalr $t9 + +FunctionEntry_restore: + // Restore argument registers + ldc1 $f12, 0($sp) + ldc1 $f13, 8($sp) + ldc1 $f14, 16($sp) + ldc1 $f15, 24($sp) + ldc1 $f16, 32($sp) + ldc1 $f17, 40($sp) + ldc1 $f18, 48($sp) + ldc1 $f19, 56($sp) + ld $a0, 64($sp) + ld $a1, 72($sp) + ld $a2, 80($sp) + ld $a3, 88($sp) + ld $a4, 96($sp) + ld $a5, 104($sp) + ld $a6, 112($sp) + ld $a7, 120($sp) + ld $gp, 128($sp) + ld $ra, 136($sp) + daddiu $sp, $sp, 144 + jr $ra +FunctionEntry_end: + .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry + .cfi_endproc + + .text + .globl __xray_FunctionExit + .p2align 2 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + // Save return registers before doing any actual work. + .cfi_def_cfa_offset 64 + daddiu $sp, $sp, -64 + sd $ra, 56($sp) + .cfi_offset 31, -8 + sd $gp, 48($sp) + sd $a0, 40($sp) + sd $v1, 32($sp) + sd $v0, 24($sp) + sdc1 $f2, 16($sp) + sdc1 $f1, 8($sp) + sdc1 $f0, 0($sp) + + lui $gp, %hi(%neg(%gp_rel(__xray_FunctionExit))) + daddu $gp, $gp, $t9 + daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionExit))) + + dla $t9, _ZN6__xray19XRayPatchedFunctionE + ld $t9, 0($t9) + + beqz $t9, FunctionExit_restore + + // a1=1 means that we are tracing an exit event + li $a1, 1 + // Function ID is in t0 (the first parameter). + move $a0, $t0 + jalr $t9 + +FunctionExit_restore: + // Restore return registers + ldc1 $f0, 0($sp) + ldc1 $f1, 8($sp) + ldc1 $f2, 16($sp) + ld $v0, 24($sp) + ld $v1, 32($sp) + ld $a0, 40($sp) + ld $gp, 48($sp) + ld $ra, 56($sp) + daddiu $sp, $sp, 64 + jr $ra + +FunctionExit_end: + .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit + .cfi_endproc diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp new file mode 100644 index 000000000000..878c46930fee --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp @@ -0,0 +1,15 @@ +#include <atomic> +#include <xray/xray_interface.h> + +namespace __xray { + +extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction; + +// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand. +void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) { + auto fptr = __xray::XRayPatchedFunction.load(); + if (fptr != nullptr) + (*fptr)(FuncId, Type); +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S new file mode 100644 index 000000000000..250e2e5be67a --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S @@ -0,0 +1,235 @@ + .text + .abiversion 2 + .globl __xray_FunctionEntry + .p2align 4 +__xray_FunctionEntry: + std 0, 16(1) + stdu 1, -408(1) +# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. + std 3, 32(1) + ld 3, 400(1) # FuncId + std 4, 40(1) + std 5, 48(1) + std 6, 56(1) + std 7, 64(1) + std 8, 72(1) + std 9, 80(1) + std 10, 88(1) + addi 4, 1, 96 + stxsdx 1, 0, 4 + addi 4, 1, 104 + stxsdx 2, 0, 4 + addi 4, 1, 112 + stxsdx 3, 0, 4 + addi 4, 1, 120 + stxsdx 4, 0, 4 + addi 4, 1, 128 + stxsdx 5, 0, 4 + addi 4, 1, 136 + stxsdx 6, 0, 4 + addi 4, 1, 144 + stxsdx 7, 0, 4 + addi 4, 1, 152 + stxsdx 8, 0, 4 + addi 4, 1, 160 + stxsdx 9, 0, 4 + addi 4, 1, 168 + stxsdx 10, 0, 4 + addi 4, 1, 176 + stxsdx 11, 0, 4 + addi 4, 1, 184 + stxsdx 12, 0, 4 + addi 4, 1, 192 + stxsdx 13, 0, 4 + addi 4, 1, 200 + stxvd2x 34, 0, 4 + addi 4, 1, 216 + stxvd2x 35, 0, 4 + addi 4, 1, 232 + stxvd2x 36, 0, 4 + addi 4, 1, 248 + stxvd2x 37, 0, 4 + addi 4, 1, 264 + stxvd2x 38, 0, 4 + addi 4, 1, 280 + stxvd2x 39, 0, 4 + addi 4, 1, 296 + stxvd2x 40, 0, 4 + addi 4, 1, 312 + stxvd2x 41, 0, 4 + addi 4, 1, 328 + stxvd2x 42, 0, 4 + addi 4, 1, 344 + stxvd2x 43, 0, 4 + addi 4, 1, 360 + stxvd2x 44, 0, 4 + addi 4, 1, 376 + stxvd2x 45, 0, 4 + std 2, 392(1) + mflr 0 + std 0, 400(1) + + li 4, 0 + bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType + nop + + addi 4, 1, 96 + lxsdx 1, 0, 4 + addi 4, 1, 104 + lxsdx 2, 0, 4 + addi 4, 1, 112 + lxsdx 3, 0, 4 + addi 4, 1, 120 + lxsdx 4, 0, 4 + addi 4, 1, 128 + lxsdx 5, 0, 4 + addi 4, 1, 136 + lxsdx 6, 0, 4 + addi 4, 1, 144 + lxsdx 7, 0, 4 + addi 4, 1, 152 + lxsdx 8, 0, 4 + addi 4, 1, 160 + lxsdx 9, 0, 4 + addi 4, 1, 168 + lxsdx 10, 0, 4 + addi 4, 1, 176 + lxsdx 11, 0, 4 + addi 4, 1, 184 + lxsdx 12, 0, 4 + addi 4, 1, 192 + lxsdx 13, 0, 4 + addi 4, 1, 200 + lxvd2x 34, 0, 4 + addi 4, 1, 216 + lxvd2x 35, 0, 4 + addi 4, 1, 232 + lxvd2x 36, 0, 4 + addi 4, 1, 248 + lxvd2x 37, 0, 4 + addi 4, 1, 264 + lxvd2x 38, 0, 4 + addi 4, 1, 280 + lxvd2x 39, 0, 4 + addi 4, 1, 296 + lxvd2x 40, 0, 4 + addi 4, 1, 312 + lxvd2x 41, 0, 4 + addi 4, 1, 328 + lxvd2x 42, 0, 4 + addi 4, 1, 344 + lxvd2x 43, 0, 4 + addi 4, 1, 360 + lxvd2x 44, 0, 4 + addi 4, 1, 376 + lxvd2x 45, 0, 4 + ld 0, 400(1) + mtlr 0 + ld 2, 392(1) + ld 3, 32(1) + ld 4, 40(1) + ld 5, 48(1) + ld 6, 56(1) + ld 7, 64(1) + ld 8, 72(1) + ld 9, 80(1) + ld 10, 88(1) + + addi 1, 1, 408 + ld 0, 16(1) + blr + + .globl __xray_FunctionExit + .p2align 4 +__xray_FunctionExit: + std 0, 16(1) + stdu 1, -256(1) +# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. + std 3, 32(1) + ld 3, 248(1) # FuncId + std 4, 40(1) + addi 4, 1, 48 + stxsdx 1, 0, 4 + addi 4, 1, 56 + stxsdx 2, 0, 4 + addi 4, 1, 64 + stxsdx 3, 0, 4 + addi 4, 1, 72 + stxsdx 4, 0, 4 + addi 4, 1, 80 + stxsdx 5, 0, 4 + addi 4, 1, 88 + stxsdx 6, 0, 4 + addi 4, 1, 96 + stxsdx 7, 0, 4 + addi 4, 1, 104 + stxsdx 8, 0, 4 + addi 4, 1, 112 + stxvd2x 34, 0, 4 + addi 4, 1, 128 + stxvd2x 35, 0, 4 + addi 4, 1, 144 + stxvd2x 36, 0, 4 + addi 4, 1, 160 + stxvd2x 37, 0, 4 + addi 4, 1, 176 + stxvd2x 38, 0, 4 + addi 4, 1, 192 + stxvd2x 39, 0, 4 + addi 4, 1, 208 + stxvd2x 40, 0, 4 + addi 4, 1, 224 + stxvd2x 41, 0, 4 + std 2, 240(1) + mflr 0 + std 0, 248(1) + + li 4, 1 + bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType + nop + + addi 4, 1, 48 + lxsdx 1, 0, 4 + addi 4, 1, 56 + lxsdx 2, 0, 4 + addi 4, 1, 64 + lxsdx 3, 0, 4 + addi 4, 1, 72 + lxsdx 4, 0, 4 + addi 4, 1, 80 + lxsdx 5, 0, 4 + addi 4, 1, 88 + lxsdx 6, 0, 4 + addi 4, 1, 96 + lxsdx 7, 0, 4 + addi 4, 1, 104 + lxsdx 8, 0, 4 + addi 4, 1, 112 + lxvd2x 34, 0, 4 + addi 4, 1, 128 + lxvd2x 35, 0, 4 + addi 4, 1, 144 + lxvd2x 36, 0, 4 + addi 4, 1, 160 + lxvd2x 37, 0, 4 + addi 4, 1, 176 + lxvd2x 38, 0, 4 + addi 4, 1, 192 + lxvd2x 39, 0, 4 + addi 4, 1, 208 + lxvd2x 40, 0, 4 + addi 4, 1, 224 + lxvd2x 41, 0, 4 + ld 0, 248(1) + mtlr 0 + ld 2, 240(1) + ld 3, 32(1) + ld 4, 40(1) + + addi 1, 1, 256 + ld 0, 16(1) + blr diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S new file mode 100644 index 000000000000..12c5a6ccd9a4 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -0,0 +1,289 @@ +//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the X86-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + + + +.macro SAVE_REGISTERS + pushfq + subq $240, %rsp + CFI_DEF_CFA_OFFSET(248) + movq %rbp, 232(%rsp) + movupd %xmm0, 216(%rsp) + movupd %xmm1, 200(%rsp) + movupd %xmm2, 184(%rsp) + movupd %xmm3, 168(%rsp) + movupd %xmm4, 152(%rsp) + movupd %xmm5, 136(%rsp) + movupd %xmm6, 120(%rsp) + movupd %xmm7, 104(%rsp) + movq %rdi, 96(%rsp) + movq %rax, 88(%rsp) + movq %rdx, 80(%rsp) + movq %rsi, 72(%rsp) + movq %rcx, 64(%rsp) + movq %r8, 56(%rsp) + movq %r9, 48(%rsp) + movq %r10, 40(%rsp) + movq %r11, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, 0(%rsp) +.endm + +.macro RESTORE_REGISTERS + movq 232(%rsp), %rbp + movupd 216(%rsp), %xmm0 + movupd 200(%rsp), %xmm1 + movupd 184(%rsp), %xmm2 + movupd 168(%rsp), %xmm3 + movupd 152(%rsp), %xmm4 + movupd 136(%rsp), %xmm5 + movupd 120(%rsp) , %xmm6 + movupd 104(%rsp) , %xmm7 + movq 96(%rsp), %rdi + movq 88(%rsp), %rax + movq 80(%rsp), %rdx + movq 72(%rsp), %rsi + movq 64(%rsp), %rcx + movq 56(%rsp), %r8 + movq 48(%rsp), %r9 + movq 40(%rsp), %r10 + movq 32(%rsp), %r11 + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq 0(%rsp), %r15 + addq $240, %rsp + popfq + CFI_DEF_CFA_OFFSET(8) +.endm + +.macro ALIGNED_CALL_RAX + // Call the logging handler, after aligning the stack to a 16-byte boundary. + // The approach we're taking here uses additional stack space to stash the + // stack pointer twice before aligning the pointer to 16-bytes. If the stack + // was 8-byte aligned, it will become 16-byte aligned -- when restoring the + // pointer, we can always look -8 bytes from the current position to get + // either of the values we've stashed in the first place. + pushq %rsp + pushq (%rsp) + andq $-0x10, %rsp + callq *%rax + movq 8(%rsp), %rsp +.endm + + .text +#if !defined(__APPLE__) + .section .text + .file "xray_trampoline_x86.S" +#else + .section __TEXT,__text +#endif + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionEntry) + ASM_HIDDEN(__xray_FunctionEntry) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionEntry) +# LLVM-MCA-BEGIN __xray_FunctionEntry +ASM_SYMBOL(__xray_FunctionEntry): + CFI_STARTPROC + SAVE_REGISTERS + + // This load has to be atomic, it's concurrent with __xray_patch(). + // On x86/amd64, a simple (type-aligned) MOV instruction is enough. + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax, %rax + je .Ltmp0 + + // The patched function prologue puts its xray_instr_map index into %r10d. + movl %r10d, %edi + xor %esi,%esi + ALIGNED_CALL_RAX + +.Ltmp0: + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionEntry) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionExit) + ASM_HIDDEN(__xray_FunctionExit) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionExit) +# LLVM-MCA-BEGIN __xray_FunctionExit +ASM_SYMBOL(__xray_FunctionExit): + CFI_STARTPROC + // Save the important registers first. Since we're assuming that this + // function is only jumped into, we only preserve the registers for + // returning. + subq $56, %rsp + CFI_DEF_CFA_OFFSET(64) + movq %rbp, 48(%rsp) + movupd %xmm0, 32(%rsp) + movupd %xmm1, 16(%rsp) + movq %rax, 8(%rsp) + movq %rdx, 0(%rsp) + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax,%rax + je .Ltmp2 + + movl %r10d, %edi + movl $1, %esi + ALIGNED_CALL_RAX + +.Ltmp2: + // Restore the important registers. + movq 48(%rsp), %rbp + movupd 32(%rsp), %xmm0 + movupd 16(%rsp), %xmm1 + movq 8(%rsp), %rax + movq 0(%rsp), %rdx + addq $56, %rsp + CFI_DEF_CFA_OFFSET(8) + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionExit) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionTailExit) + ASM_HIDDEN(__xray_FunctionTailExit) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +# LLVM-MCA-BEGIN __xray_FunctionTailExit +ASM_SYMBOL(__xray_FunctionTailExit): + CFI_STARTPROC + SAVE_REGISTERS + + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax,%rax + je .Ltmp4 + + movl %r10d, %edi + movl $2, %esi + + ALIGNED_CALL_RAX + +.Ltmp4: + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionTailExit) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_ArgLoggerEntry) + ASM_HIDDEN(__xray_ArgLoggerEntry) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) +# LLVM-MCA-BEGIN __xray_ArgLoggerEntry +ASM_SYMBOL(__xray_ArgLoggerEntry): + CFI_STARTPROC + SAVE_REGISTERS + + // Again, these function pointer loads must be atomic; MOV is fine. + movq ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax + testq %rax, %rax + jne .Larg1entryLog + + // If [arg1 logging handler] not set, defer to no-arg logging. + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax, %rax + je .Larg1entryFail + +.Larg1entryLog: + + // First argument will become the third + movq %rdi, %rdx + + // XRayEntryType::LOG_ARGS_ENTRY into the second + mov $0x3, %esi + + // 32-bit function ID becomes the first + movl %r10d, %edi + ALIGNED_CALL_RAX + +.Larg1entryFail: + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_ArgLoggerEntry) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .global ASM_SYMBOL(__xray_CustomEvent) + ASM_HIDDEN(__xray_CustomEvent) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_CustomEvent) +# LLVM-MCA-BEGIN __xray_CustomEvent +ASM_SYMBOL(__xray_CustomEvent): + CFI_STARTPROC + SAVE_REGISTERS + + // We take two arguments to this trampoline, which should be in rdi and rsi + // already. + movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax + testq %rax,%rax + je .LcustomEventCleanup + + ALIGNED_CALL_RAX + +.LcustomEventCleanup: + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_CustomEvent) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .global ASM_SYMBOL(__xray_TypedEvent) + ASM_HIDDEN(__xray_TypedEvent) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_TypedEvent) +# LLVM-MCA-BEGIN __xray_TypedEvent +ASM_SYMBOL(__xray_TypedEvent): + CFI_STARTPROC + SAVE_REGISTERS + + // We pass three arguments to this trampoline, which should be in rdi, rsi + // and rdx without our intervention. + movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax + testq %rax,%rax + je .LtypedEventCleanup + + ALIGNED_CALL_RAX + +.LtypedEventCleanup: + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_TypedEvent) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h new file mode 100644 index 000000000000..bd7e1911abb3 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h @@ -0,0 +1,90 @@ +//===-- xray_tsc.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_EMULATE_TSC_H +#define XRAY_EMULATE_TSC_H + +#include "sanitizer_common/sanitizer_common.h" + +namespace __xray { +static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; +} + +#if SANITIZER_FUCHSIA +#include <zircon/syscalls.h> + +namespace __xray { + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + CPU = 0; + return _zx_ticks_get(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + return _zx_ticks_per_second(); +} + +} // namespace __xray + +#else // SANITIZER_FUCHSIA + +#if defined(__x86_64__) +#include "xray_x86_64.inc" +#elif defined(__powerpc64__) +#include "xray_powerpc64.inc" +#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) +// Emulated TSC. +// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does +// not have a constant frequency like TSC on x86(_64), it may go faster +// or slower depending on CPU turbo or power saving mode. Furthermore, +// to read from CP15 on ARM a kernel modification or a driver is needed. +// We can not require this from users of compiler-rt. +// So on ARM we use clock_gettime() which gives the result in nanoseconds. +// To get the measurements per second, we scale this by the number of +// nanoseconds per second, pretending that the TSC frequency is 1GHz and +// one TSC tick is 1 nanosecond. +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" +#include <cerrno> +#include <cstdint> +#include <time.h> + +namespace __xray { + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno)); + TS.tv_sec = 0; + TS.tv_nsec = 0; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + return NanosecondsPerSecond; +} + +} // namespace __xray + +#else +#error Target architecture is not supported. +#endif // CPU architecture +#endif // SANITIZER_FUCHSIA + +#endif // XRAY_EMULATE_TSC_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp new file mode 100644 index 000000000000..4c8ad5b92be7 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp @@ -0,0 +1,199 @@ +//===-- xray_utils.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#include "xray_utils.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include <cstdio> +#include <errno.h> +#include <fcntl.h> +#include <iterator> +#include <stdlib.h> +#include <sys/types.h> +#include <tuple> +#include <unistd.h> +#include <utility> + +#if SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_symbolizer_fuchsia.h" + +#include <inttypes.h> +#include <zircon/process.h> +#include <zircon/sanitizer.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#endif + +namespace __xray { + +#if SANITIZER_FUCHSIA +constexpr const char* ProfileSinkName = "llvm-xray"; + +LogWriter::~LogWriter() { + _zx_handle_close(Vmo); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if (RoundUpTo(Offset, PageSize) != RoundUpTo(Offset + TotalBytes, PageSize)) { + // Resize the VMO to ensure there's sufficient space for the data. + zx_status_t Status = _zx_vmo_set_size(Vmo, Offset + TotalBytes); + if (Status != ZX_OK) { + Report("Failed to resize VMO: %s\n", _zx_status_get_string(Status)); + return; + } + } + + // Write the data into VMO. + zx_status_t Status = _zx_vmo_write(Vmo, Begin, Offset, TotalBytes); + if (Status != ZX_OK) { + Report("Failed to write: %s\n", _zx_status_get_string(Status)); + return; + } + Offset += TotalBytes; + + // Record the data size as a property of the VMO. + _zx_object_set_property(Vmo, ZX_PROP_VMO_CONTENT_SIZE, + &Offset, sizeof(Offset)); +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { + // Nothing to do here since WriteAll writes directly into the VMO. +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { + // Create VMO to hold the profile data. + zx_handle_t Vmo; + zx_status_t Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo); + if (Status != ZX_OK) { + Report("XRay: cannot create VMO: %s\n", _zx_status_get_string(Status)); + return nullptr; + } + + // Get the KOID of the current process to use in the VMO name. + zx_info_handle_basic_t Info; + Status = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, + sizeof(Info), NULL, NULL); + if (Status != ZX_OK) { + Report("XRay: cannot get basic info about current process handle: %s\n", + _zx_status_get_string(Status)); + return nullptr; + } + + // Give the VMO a name including our process KOID so it's easy to spot. + char VmoName[ZX_MAX_NAME_LEN]; + internal_snprintf(VmoName, sizeof(VmoName), "%s.%zu", ProfileSinkName, + Info.koid); + _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName)); + + // Duplicate the handle since __sanitizer_publish_data consumes it and + // LogWriter needs to hold onto it. + zx_handle_t Handle; + Status =_zx_handle_duplicate(Vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); + if (Status != ZX_OK) { + Report("XRay: cannot duplicate VMO handle: %s\n", + _zx_status_get_string(Status)); + return nullptr; + } + + // Publish the VMO that receives the logging. Note the VMO's contents can + // grow and change after publication. The contents won't be read out until + // after the process exits. + __sanitizer_publish_data(ProfileSinkName, Handle); + + // Use the dumpfile symbolizer markup element to write the name of the VMO. + Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName); + + LogWriter *LW = reinterpret_cast<LogWriter *>(InternalAlloc(sizeof(LogWriter))); + new (LW) LogWriter(Vmo); + return LW; +} + +void LogWriter::Close(LogWriter *LW) { + LW->~LogWriter(); + InternalFree(LW); +} +#else // SANITIZER_FUCHSIA +LogWriter::~LogWriter() { + internal_close(Fd); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + while (auto Written = write(Fd, Begin, TotalBytes)) { + if (Written < 0) { + if (errno == EINTR) + continue; // Try again. + Report("Failed to write; errno = %d\n", errno); + return; + } + TotalBytes -= Written; + if (TotalBytes == 0) + break; + Begin += Written; + } +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { + fsync(Fd); +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { + // Open a temporary file once for the log. + char TmpFilename[256] = {}; + char TmpWildcardPattern[] = "XXXXXX"; + auto **Argv = GetArgv(); + const char *Progname = !Argv ? "(unknown)" : Argv[0]; + const char *LastSlash = internal_strrchr(Progname, '/'); + + if (LastSlash != nullptr) + Progname = LastSlash + 1; + + int NeededLength = internal_snprintf( + TmpFilename, sizeof(TmpFilename), "%s%s.%s", + flags()->xray_logfile_base, Progname, TmpWildcardPattern); + if (NeededLength > int(sizeof(TmpFilename))) { + Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); + return nullptr; + } + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.\n", + TmpFilename); + return nullptr; + } + if (Verbosity()) + Report("XRay: Log file in '%s'\n", TmpFilename); + + LogWriter *LW = allocate<LogWriter>(); + new (LW) LogWriter(Fd); + return LW; +} + +void LogWriter::Close(LogWriter *LW) { + LW->~LogWriter(); + deallocate(LW); +} +#endif // SANITIZER_FUCHSIA + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h new file mode 100644 index 000000000000..333826168c0d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h @@ -0,0 +1,85 @@ +//===-- xray_utils.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Some shared utilities for the XRay runtime implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_UTILS_H +#define XRAY_UTILS_H + +#include <cstddef> +#include <cstdint> +#include <sys/types.h> +#include <utility> + +#include "sanitizer_common/sanitizer_common.h" +#if SANITIZER_FUCHSIA +#include <zircon/types.h> +#endif + +namespace __xray { + +class LogWriter { +public: +#if SANITIZER_FUCHSIA + LogWriter(zx_handle_t Vmo) : Vmo(Vmo) {} +#else + explicit LogWriter(int Fd) : Fd(Fd) {} +#endif + ~LogWriter(); + + // Write a character range into a log. + void WriteAll(const char *Begin, const char *End); + + void Flush(); + + // Returns a new log instance initialized using the flag-provided values. + static LogWriter *Open(); + // Closes and deallocates the log instance. + static void Close(LogWriter *LogWriter); + +private: +#if SANITIZER_FUCHSIA + zx_handle_t Vmo = ZX_HANDLE_INVALID; + uint64_t Offset = 0; +#else + int Fd = -1; +#endif +}; + +constexpr size_t gcd(size_t a, size_t b) { + return (b == 0) ? a : gcd(b, a % b); +} + +constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); } + +constexpr size_t nearest_boundary(size_t number, size_t multiple) { + return multiple * ((number / multiple) + (number % multiple ? 1 : 0)); +} + +constexpr size_t next_pow2_helper(size_t num, size_t acc) { + return (1u << acc) >= num ? (1u << acc) : next_pow2_helper(num, acc + 1); +} + +constexpr size_t next_pow2(size_t number) { + return next_pow2_helper(number, 1); +} + +template <class T> constexpr T &max(T &A, T &B) { return A > B ? A : B; } + +template <class T> constexpr T &min(T &A, T &B) { return A <= B ? A : B; } + +constexpr ptrdiff_t diff(uintptr_t A, uintptr_t B) { + return max(A, B) - min(A, B); +} + +} // namespace __xray + +#endif // XRAY_UTILS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp new file mode 100644 index 000000000000..f3742ac71290 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp @@ -0,0 +1,359 @@ +#include "cpuid.h" +#include "sanitizer_common/sanitizer_common.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_defs.h" +#include "xray_interface_internal.h" + +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +#include <sys/types.h> +#if SANITIZER_OPENBSD +#include <sys/time.h> +#include <machine/cpu.h> +#endif +#include <sys/sysctl.h> +#elif SANITIZER_FUCHSIA +#include <zircon/syscalls.h> +#endif + +#include <atomic> +#include <cstdint> +#include <errno.h> +#include <fcntl.h> +#include <iterator> +#include <limits> +#include <tuple> +#include <unistd.h> + +namespace __xray { + +#if SANITIZER_LINUX +static std::pair<ssize_t, bool> +retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d\n", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, + long long *Value) XRAY_NEVER_INSTRUMENT { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + close(Fd); + if (!Success) + return false; + const char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + long long TSCFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &TSCFrequency)) { + TSCFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &TSCFrequency)) { + TSCFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); +} +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + long long TSCFrequency = -1; + size_t tscfreqsz = sizeof(TSCFrequency); +#if SANITIZER_OPENBSD + int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; + if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { +#elif SANITIZER_MAC + if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, + &tscfreqsz, NULL, 0) != -1) { + +#else + if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, + NULL, 0) != -1) { +#endif + return static_cast<uint64_t>(TSCFrequency); + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + + return 0; +} +#elif !SANITIZER_FUCHSIA +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + /* Not supported */ + return 0; +} +#endif + +static constexpr uint8_t CallOpCode = 0xe8; +static constexpr uint16_t MovR10Seq = 0xba41; +static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint16_t Jmp20Seq = 0x14eb; +static constexpr uint16_t Jmp15Seq = 0x0feb; +static constexpr uint8_t JmpOpCode = 0xe9; +static constexpr uint8_t RetOpCode = 0xc3; +static constexpr uint16_t NopwSeq = 0x9066; + +static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; +static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // call <relative 32bit offset to entry trampoline> + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the trampoline's address. + const uint64_t Address = Sled.address(); + int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - + (static_cast<int64_t>(Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline, + reinterpret_cast<void *>(Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // jmp <relative 32bit offset to exit trampoline> + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + const uint64_t Address = Sled.address(); + int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - + (static_cast<int64_t>(Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionExit, reinterpret_cast<void *>(Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; + *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the tail call sled with a similar + // sequence as the entry sled, but calls the tail exit sled instead. + const uint64_t Address = Sled.address(); + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionTailExit) - + (static_cast<int64_t>(Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionTailExit, reinterpret_cast<void *>(Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // In Version 0: + // + // xray_sled_n: + // jmp +20 // 2 bytes + // ... + // + // With the following: + // + // nopw // 2 bytes* + // ... + // + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. + // + // --- + // + // In Version 1 or 2: + // + // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back + // to a jmp, use 15 bytes instead. + // + const uint64_t Address = Sled.address(); + if (Enable) { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, + std::memory_order_release); + } else { + switch (Sled.Version) { + case 1: + case 2: + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, + std::memory_order_release); + break; + case 0: + default: + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, + std::memory_order_release); + break; + } + } + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +20 // 2 byte instruction + // ... + // + // With the following: + // + // nopw // 2 bytes + // ... + // + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. + // The 20 byte sled stashes three argument registers, calls the trampoline, + // unstashes the registers and returns. If the arguments are already in + // the correct registers, the stashing and unstashing become equivalently + // sized nops. + const uint64_t Address = Sled.address(); + if (Enable) { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, + std::memory_order_release); + } + return false; +} + +#if !SANITIZER_FUCHSIA +// We determine whether the CPU we're running on has the correct features we +// need. In x86_64 this will be rdtscp support. +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { + unsigned int EAX, EBX, ECX, EDX; + + // We check whether rdtscp support is enabled. According to the x86_64 manual, + // level should be set at 0x80000001, and we should have a look at bit 27 in + // EDX. That's 0x8000000 (or 1u << 27). + __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) + : "0"(0x80000001)); + if (!(EDX & (1u << 27))) { + Report("Missing rdtscp support.\n"); + return false; + } + // Also check whether we can determine the CPU frequency, since if we cannot, + // we should use the emulated TSC instead. + if (!getTSCFrequency()) { + Report("Unable to determine CPU frequency.\n"); + return false; + } + return true; +} +#endif + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc new file mode 100644 index 000000000000..477900355cf4 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc @@ -0,0 +1,33 @@ +//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> +#include <x86intrin.h> + +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + unsigned LongCPU; + unsigned long Rax, Rdx; + __asm__ __volatile__("rdtscp\n" : "=a"(Rax), "=d"(Rdx), "=c"(LongCPU) ::); + CPU = LongCPU; + return (Rdx << 32) + Rax; +} + +uint64_t getTSCFrequency(); + +bool probeRequiredCPUFeatures(); + +} // namespace __xray |