diff options
Diffstat (limited to 'lib/xray/xray_arm.cc')
-rw-r--r-- | lib/xray/xray_arm.cc | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc new file mode 100644 index 000000000000..d89322e833e5 --- /dev/null +++ b/lib/xray/xray_arm.cc @@ -0,0 +1,156 @@ +//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_emulate_tsc.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +namespace __xray { + +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86[_64]; it may go faster or + // slower depending on CPU's turbo or power saving modes. Furthermore, to + // read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} + PO_BlxIp = 0xE12FFF3C, // BLX ip + PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} + PO_B20 = 0xEA000005 // B #20 +}; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return getMovwMask(Value >> 16); +} + +// Writes the following instructions: +// MOVW R<regNo>, #<lower 16 bits of the |Value|> +// MOVT R<regNo>, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadReg(uint8_t regNo, uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + // This is a fatal error: we cannot just report it and continue execution. + assert(regNo <= 15 && "Register number must be 0 to 15."); + // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ + *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); + Address++; + // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ + *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); + return Address + 1; +} + +// Writes the following instructions: +// MOVW r0, #<lower 16 bits of the |Value|> +// MOVT r0, #<higher 16 bits of the |Value|> +inline static uint32_t * +Write32bitLoadR0(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(0, Address, Value); +} + +// Writes the following instructions: +// MOVW ip, #<lower 16 bits of the |Value|> +// MOVT ip, #<higher 16 bits of the |Value|> +inline static uint32_t * +Write32bitLoadIP(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(12, Address, Value); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #20 + // 6 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // PUSH {r0, lr} + // MOVW r0, #<lower 16 bits of function ID> + // MOVT r0, #<higher 16 bits of function ID> + // MOVW ip, #<lower 16 bits of address of TracingHook> + // MOVT ip, #<higher 16 bits of address of TracingHook> + // BLX ip + // POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #20 + + uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address); + if (Enable) { + uint32_t *CurAddress = FirstAddress + 1; + CurAddress = + Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); + CurAddress = + Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook)); + *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +} // namespace __xray |