From f9102cdabba485d415359124bece145f4a7d9089 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 16 May 2017 19:47:19 +0000 Subject: Vendor import of compiler-rt trunk r303197: https://llvm.org/svn/llvm-project/compiler-rt/trunk@303197 --- CMakeLists.txt | 17 +- cmake/config-ix.cmake | 2 +- include/xray/xray_interface.h | 13 +- lib/asan/asan_allocator.h | 15 +- lib/asan/asan_flags.cc | 4 + lib/asan/tests/asan_str_test.cc | 21 ++ lib/builtins/CMakeLists.txt | 16 +- lib/builtins/adddf3.c | 9 +- lib/builtins/addsf3.c | 9 +- lib/builtins/arm/aeabi_cdcmpeq_check_nan.c | 4 +- lib/builtins/arm/aeabi_cfcmpeq_check_nan.c | 4 +- lib/builtins/arm/aeabi_div0.c | 6 +- lib/builtins/arm/aeabi_drsub.c | 4 +- lib/builtins/arm/aeabi_frsub.c | 4 +- lib/builtins/ashldi3.c | 9 +- lib/builtins/ashrdi3.c | 9 +- lib/builtins/assembly.h | 3 +- lib/builtins/comparedf2.c | 8 +- lib/builtins/comparesf2.c | 9 +- lib/builtins/divdf3.c | 9 +- lib/builtins/divsf3.c | 9 +- lib/builtins/divsi3.c | 9 +- lib/builtins/extendhfsf2.c | 9 +- lib/builtins/extendsfdf2.c | 9 +- lib/builtins/fixdfdi.c | 13 +- lib/builtins/fixdfsi.c | 9 +- lib/builtins/fixsfdi.c | 14 +- lib/builtins/fixsfsi.c | 9 +- lib/builtins/fixunsdfdi.c | 14 +- lib/builtins/fixunsdfsi.c | 9 +- lib/builtins/fixunssfdi.c | 14 +- lib/builtins/fixunssfsi.c | 9 +- lib/builtins/floatdidf.c | 9 +- lib/builtins/floatdisf.c | 9 +- lib/builtins/floatsidf.c | 9 +- lib/builtins/floatsisf.c | 9 +- lib/builtins/floatundidf.c | 9 +- lib/builtins/floatundisf.c | 9 +- lib/builtins/floatunsidf.c | 9 +- lib/builtins/floatunsisf.c | 9 +- lib/builtins/int_lib.h | 7 +- lib/builtins/lshrdi3.c | 9 +- lib/builtins/muldf3.c | 9 +- lib/builtins/muldi3.c | 9 +- lib/builtins/mulsf3.c | 9 +- lib/builtins/negdf2.c | 9 +- lib/builtins/negsf2.c | 9 +- lib/builtins/subdf3.c | 8 +- lib/builtins/subsf3.c | 8 +- lib/builtins/truncdfhf2.c | 9 +- lib/builtins/truncdfsf2.c | 9 +- lib/builtins/truncsfhf2.c | 9 +- lib/builtins/udivsi3.c | 9 +- lib/esan/esan_interceptors.cpp | 2 + lib/lsan/lsan_allocator.h | 16 +- lib/lsan/lsan_common_linux.cc | 6 +- lib/lsan/lsan_common_mac.cc | 5 + lib/msan/msan_allocator.cc | 31 ++- lib/msan/msan_interceptors.cc | 36 +--- lib/msan/tests/msan_test.cc | 60 +++++- .../sanitizer_allocator_internal.h | 22 +- .../sanitizer_allocator_primary32.h | 27 ++- .../sanitizer_common_interceptors.inc | 43 ++++ lib/sanitizer_common/sanitizer_flags.inc | 3 + .../sanitizer_platform_interceptors.h | 14 ++ lib/sanitizer_common/sanitizer_procmaps.h | 1 + lib/sanitizer_common/sanitizer_procmaps_linux.cc | 4 +- lib/sanitizer_common/sanitizer_procmaps_mac.cc | 87 +++++++- lib/sanitizer_common/sanitizer_stoptheworld_mac.cc | 4 + .../tests/sanitizer_allocator_test.cc | 38 ++-- lib/sanitizer_common/tests/sanitizer_test_utils.h | 6 + lib/scudo/scudo_allocator.cpp | 126 ++++++----- lib/scudo/scudo_allocator.h | 23 +- lib/scudo/scudo_allocator_combined.h | 84 ++++++++ lib/scudo/scudo_allocator_secondary.h | 101 +++------ lib/scudo/scudo_crc32.cpp | 19 +- lib/scudo/scudo_crc32.h | 101 +++++++++ lib/scudo/scudo_utils.h | 59 ----- lib/tsan/check_analyze.sh | 2 +- lib/tsan/rtl/tsan_rtl.h | 18 +- lib/xray/xray_AArch64.cc | 13 +- lib/xray/xray_arm.cc | 12 +- lib/xray/xray_fdr_log_records.h | 1 + lib/xray/xray_fdr_logging.cc | 150 +++++++++---- lib/xray/xray_fdr_logging_impl.h | 237 +++++++++++++-------- lib/xray/xray_interface.cc | 21 +- lib/xray/xray_interface_internal.h | 2 + lib/xray/xray_mips.cc | 9 +- lib/xray/xray_mips64.cc | 8 +- lib/xray/xray_powerpc64.cc | 6 + lib/xray/xray_trampoline_powerpc64_asm.S | 90 ++++++-- lib/xray/xray_trampoline_x86_64.S | 46 +++- lib/xray/xray_x86_64.cc | 36 ++++ test/asan/CMakeLists.txt | 17 +- .../Linux/sanbox_read_proc_self_maps_test.cc | 30 +++ test/asan/TestCases/Posix/strndup_oob_test.cc | 27 +++ test/asan/lit.cfg | 9 +- test/dfsan/custom.cc | 2 +- test/lsan/TestCases/link_turned_off.cc | 2 + test/lsan/TestCases/recoverable_leak_check.cc | 2 + test/lsan/lit.common.cfg | 5 +- test/msan/chained_origin_memcpy.cc | 2 +- test/msan/pr32842.c | 22 ++ test/msan/strndup.cc | 28 +++ test/msan/wcsncpy.cc | 8 +- test/safestack/canary.c | 3 +- .../TestCases/Linux/getpwnam_r_invalid_user.cc | 2 + .../TestCases/sanitizer_coverage_no_prune.cc | 2 +- test/ubsan/TestCases/Misc/missing_return.cpp | 2 +- test/ubsan/TestCases/TypeCheck/misaligned.cpp | 6 +- test/ubsan/lit.common.cfg | 4 + test/xray/TestCases/Linux/coverage-sample.cc | 3 - test/xray/TestCases/Linux/custom-event-logging.cc | 40 ++++ test/xray/TestCases/Linux/func-id-utils.cc | 24 ++- 114 files changed, 1685 insertions(+), 611 deletions(-) create mode 100644 lib/scudo/scudo_allocator_combined.h create mode 100644 lib/scudo/scudo_crc32.h create mode 100644 test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc create mode 100644 test/asan/TestCases/Posix/strndup_oob_test.cc create mode 100644 test/msan/pr32842.c create mode 100644 test/msan/strndup.cc create mode 100644 test/xray/TestCases/Linux/custom-event-logging.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 32358a1262c9..b522c340d669 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,9 @@ mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS) option(COMPILER_RT_BUILD_XRAY "Build xray" ON) mark_as_advanced(COMPILER_RT_BUILD_XRAY) +set(COMPILER_RT_BAREMETAL_BUILD OFF CACHE BOOLEAN + "Build for a bare-metal target.") + if (COMPILER_RT_STANDALONE_BUILD) load_llvm_config() @@ -239,14 +242,24 @@ set(COMPILER_RT_LIBCXX_PATH ${LLVM_MAIN_SRC_DIR}/projects/libcxx) if(EXISTS ${COMPILER_RT_LIBCXX_PATH}/) set(COMPILER_RT_HAS_LIBCXX_SOURCES TRUE) else() - set(COMPILER_RT_HAS_LIBCXX_SOURCES FALSE) + set(COMPILER_RT_LIBCXX_PATH ${LLVM_MAIN_SRC_DIR}/../libcxx) + if(EXISTS ${COMPILER_RT_LIBCXX_PATH}/) + set(COMPILER_RT_HAS_LIBCXX_SOURCES TRUE) + else() + set(COMPILER_RT_HAS_LIBCXX_SOURCES FALSE) + endif() endif() set(COMPILER_RT_LLD_PATH ${LLVM_MAIN_SRC_DIR}/tools/lld) if(EXISTS ${COMPILER_RT_LLD_PATH}/ AND LLVM_TOOL_LLD_BUILD) set(COMPILER_RT_HAS_LLD TRUE) else() - set(COMPILER_RT_HAS_LLD FALSE) + set(COMPILER_RT_LLD_PATH ${LLVM_MAIN_SRC_DIR}/../lld) + if(EXISTS ${COMPILER_RT_LLD_PATH}/) + set(COMPILER_RT_HAS_LLD TRUE) + else() + set(COMPILER_RT_HAS_LLD FALSE) + endif() endif() pythonize_bool(COMPILER_RT_HAS_LLD) diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 60cb39a93b29..ae2a262a14a9 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -476,7 +476,7 @@ else() endif() if (COMPILER_RT_HAS_SANITIZER_COMMON AND LSAN_SUPPORTED_ARCH AND - OS_NAME MATCHES "Linux|FreeBSD") + OS_NAME MATCHES "Darwin|Linux|FreeBSD") set(COMPILER_RT_HAS_LSAN TRUE) else() set(COMPILER_RT_HAS_LSAN FALSE) diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h index c90025e38aae..c3833f0be357 100644 --- a/include/xray/xray_interface.h +++ b/include/xray/xray_interface.h @@ -1,4 +1,4 @@ -//===-- xray_interface.h ----------------------------------------*- C++ -*-===// +//===- xray_interface.h -----------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,11 +11,12 @@ // // APIs for controlling XRay functionality explicitly. //===----------------------------------------------------------------------===// + #ifndef XRAY_XRAY_INTERFACE_H #define XRAY_XRAY_INTERFACE_H +#include #include -#include extern "C" { @@ -25,6 +26,7 @@ enum XRayEntryType { EXIT = 1, TAIL = 2, LOG_ARGS_ENTRY = 3, + CUSTOM_EVENT = 4, }; /// Provide a function to invoke for when instrumentation points are hit. This @@ -64,6 +66,9 @@ extern int __xray_set_handler_arg1(void (*)(int32_t, XRayEntryType, uint64_t)); /// Returns 1 on success, 0 on error. extern int __xray_remove_handler_arg1(); +/// Provide a function to invoke when XRay encounters a custom event. +extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t)); + enum XRayPatchingStatus { NOT_INITIALIZED = 0, SUCCESS = 1, @@ -96,6 +101,6 @@ extern uintptr_t __xray_function_address(int32_t FuncId); /// encounter errors (when there are no instrumented functions, etc.). extern size_t __xray_max_function_id(); -} +} // end extern "C" -#endif +#endif // XRAY_XRAY_INTERFACE_H diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h index ee28ecf98cab..ad1aeb58a86b 100644 --- a/lib/asan/asan_allocator.h +++ b/lib/asan/asan_allocator.h @@ -161,10 +161,17 @@ typedef FlatByteMap ByteMap; typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; # endif typedef CompactSizeClassMap SizeClassMap; -typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 16, - SizeClassMap, kRegionSizeLog, - ByteMap, - AsanMapUnmapCallback> PrimaryAllocator; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = 16; + typedef __asan::SizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = __asan::kRegionSizeLog; + typedef __asan::ByteMap ByteMap; + typedef AsanMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +typedef SizeClassAllocator32 PrimaryAllocator; #endif // SANITIZER_CAN_USE_ALLOCATOR64 static const uptr kNumberOfSizeClasses = SizeClassMap::kNumClasses; diff --git a/lib/asan/asan_flags.cc b/lib/asan/asan_flags.cc index c8ae3faed7c2..6be0d6e94b9a 100644 --- a/lib/asan/asan_flags.cc +++ b/lib/asan/asan_flags.cc @@ -194,6 +194,10 @@ void InitializeFlags() { Report("WARNING: strchr* interceptors are enabled even though " "replace_str=0. Use intercept_strchr=0 to disable them."); } + if (!f->replace_str && common_flags()->intercept_strndup) { + Report("WARNING: strndup* interceptors are enabled even though " + "replace_str=0. Use intercept_strndup=0 to disable them."); + } } } // namespace __asan diff --git a/lib/asan/tests/asan_str_test.cc b/lib/asan/tests/asan_str_test.cc index c790088f8f9e..8f4911fd9ff8 100644 --- a/lib/asan/tests/asan_str_test.cc +++ b/lib/asan/tests/asan_str_test.cc @@ -154,6 +154,27 @@ TEST(AddressSanitizer, MAYBE_StrDupOOBTest) { free(str); } +#if SANITIZER_TEST_HAS_STRNDUP +TEST(AddressSanitizer, MAYBE_StrNDupOOBTest) { + size_t size = Ident(42); + char *str = MallocAndMemsetString(size); + char *new_str; + // Normal strndup calls. + str[size - 1] = '\0'; + new_str = strndup(str, size - 13); + free(new_str); + new_str = strndup(str + size - 1, 13); + free(new_str); + // Argument points to not allocated memory. + EXPECT_DEATH(Ident(strndup(str - 1, 13)), LeftOOBReadMessage(1)); + EXPECT_DEATH(Ident(strndup(str + size, 13)), RightOOBReadMessage(0)); + // Overwrite the terminating '\0' and hit unallocated memory. + str[size - 1] = 'z'; + EXPECT_DEATH(Ident(strndup(str, size + 13)), RightOOBReadMessage(0)); + free(str); +} +#endif // SANITIZER_TEST_HAS_STRNDUP + TEST(AddressSanitizer, StrCpyOOBTest) { size_t to_size = Ident(30); size_t from_size = Ident(6); // less than to_size diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt index e3779ca79ceb..df80a50444e5 100644 --- a/lib/builtins/CMakeLists.txt +++ b/lib/builtins/CMakeLists.txt @@ -66,7 +66,6 @@ set(GENERIC_SOURCES divti3.c divtf3.c divxc3.c - enable_execute_stack.c eprintf.c extendsfdf2.c extendhfsf2.c @@ -191,6 +190,12 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN "Skip the atomic builtin (this may be needed if system headers are unavailable)" Off) +if(NOT COMPILER_RT_BAREMETAL_BUILD) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + enable_execute_stack.c) +endif() + if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN) set(GENERIC_SOURCES ${GENERIC_SOURCES} @@ -478,11 +483,18 @@ else () foreach (arch ${BUILTIN_SUPPORTED_ARCH}) if (CAN_TARGET_${arch}) + # NOTE: some architectures (e.g. i386) have multiple names. Ensure that + # we catch them all. + set(_arch ${arch}) + if("${arch}" STREQUAL "i686") + set(_arch "i386|i686") + endif() + # Filter out generic versions of routines that are re-implemented in # architecture specific manner. This prevents multiple definitions of the # same symbols, making the symbol selection non-deterministic. foreach (_file ${${arch}_SOURCES}) - if (${_file} MATCHES ${arch}/*) + if (${_file} MATCHES ${_arch}/*) get_filename_component(_name ${_file} NAME) string(REPLACE ".S" ".c" _cname "${_name}") list(REMOVE_ITEM ${arch}_SOURCES ${_cname}) diff --git a/lib/builtins/adddf3.c b/lib/builtins/adddf3.c index 8b7aae0a6f87..c528e9e21f51 100644 --- a/lib/builtins/adddf3.c +++ b/lib/builtins/adddf3.c @@ -15,8 +15,13 @@ #define DOUBLE_PRECISION #include "fp_add_impl.inc" -ARM_EABI_FNALIAS(dadd, adddf3) - COMPILER_RT_ABI double __adddf3(double a, double b){ return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_dadd(double a, double b) { + return __adddf3(a, b); +} +#endif + diff --git a/lib/builtins/addsf3.c b/lib/builtins/addsf3.c index 0f5d6ea4097a..fe570687a25e 100644 --- a/lib/builtins/addsf3.c +++ b/lib/builtins/addsf3.c @@ -15,8 +15,13 @@ #define SINGLE_PRECISION #include "fp_add_impl.inc" -ARM_EABI_FNALIAS(fadd, addsf3) - COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_fadd(float a, float b) { + return __addsf3(a, b); +} +#endif + diff --git a/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c index 577f6b2c5535..7578433a1df7 100644 --- a/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c +++ b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include +#include "../int_lib.h" -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) +AEABI_RTABI __attribute__((visibility("hidden"))) int __aeabi_cdcmpeq_check_nan(double a, double b) { return __builtin_isnan(a) || __builtin_isnan(b); } diff --git a/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c index 992e31fbd8d6..43dde9a49597 100644 --- a/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c +++ b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include +#include "../int_lib.h" -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) +AEABI_RTABI __attribute__((visibility("hidden"))) int __aeabi_cfcmpeq_check_nan(float a, float b) { return __builtin_isnan(a) || __builtin_isnan(b); } diff --git a/lib/builtins/arm/aeabi_div0.c b/lib/builtins/arm/aeabi_div0.c index ccc95fa5c12e..dc3031326e37 100644 --- a/lib/builtins/arm/aeabi_div0.c +++ b/lib/builtins/arm/aeabi_div0.c @@ -26,16 +26,18 @@ * line. */ +#include "../int_lib.h" + /* provide an unused declaration to pacify pendantic compilation */ extern unsigned char declaration; #if defined(__ARM_EABI__) -int __attribute__((weak)) __attribute__((visibility("hidden"))) +AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden"))) __aeabi_idiv0(int return_value) { return return_value; } -long long __attribute__((weak)) __attribute__((visibility("hidden"))) +AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden"))) __aeabi_ldiv0(long long return_value) { return return_value; } diff --git a/lib/builtins/arm/aeabi_drsub.c b/lib/builtins/arm/aeabi_drsub.c index fc17d5a4cc76..1254886086fb 100644 --- a/lib/builtins/arm/aeabi_drsub.c +++ b/lib/builtins/arm/aeabi_drsub.c @@ -10,10 +10,10 @@ #define DOUBLE_PRECISION #include "../fp_lib.h" -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t); -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); } diff --git a/lib/builtins/arm/aeabi_frsub.c b/lib/builtins/arm/aeabi_frsub.c index 64258dc7e070..34f2303745bc 100644 --- a/lib/builtins/arm/aeabi_frsub.c +++ b/lib/builtins/arm/aeabi_frsub.c @@ -10,10 +10,10 @@ #define SINGLE_PRECISION #include "../fp_lib.h" -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t); -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); } diff --git a/lib/builtins/ashldi3.c b/lib/builtins/ashldi3.c index eb4698ac517c..fcb0abdb1fce 100644 --- a/lib/builtins/ashldi3.c +++ b/lib/builtins/ashldi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(llsl, ashldi3) - COMPILER_RT_ABI di_int __ashldi3(di_int a, si_int b) { @@ -41,3 +39,10 @@ __ashldi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) { + return __ashldi3(a, b); +} +#endif + diff --git a/lib/builtins/ashrdi3.c b/lib/builtins/ashrdi3.c index 14c878bb7793..b4ab4c617ba0 100644 --- a/lib/builtins/ashrdi3.c +++ b/lib/builtins/ashrdi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(lasr, ashrdi3) - COMPILER_RT_ABI di_int __ashrdi3(di_int a, si_int b) { @@ -42,3 +40,10 @@ __ashrdi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) { + return __ashrdi3(a, b); +} +#endif + diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h index 29d9f8844a6a..12c13c495509 100644 --- a/lib/builtins/assembly.h +++ b/lib/builtins/assembly.h @@ -44,7 +44,8 @@ #endif #define CONST_SECTION .section .rodata -#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__) +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits #else #define NO_EXEC_STACK_DIRECTIVE diff --git a/lib/builtins/comparedf2.c b/lib/builtins/comparedf2.c index 9e29752231e9..c5bb169d0021 100644 --- a/lib/builtins/comparedf2.c +++ b/lib/builtins/comparedf2.c @@ -113,8 +113,6 @@ __gedf2(fp_t a, fp_t b) { } } -ARM_EABI_FNALIAS(dcmpun, unorddf2) - COMPILER_RT_ABI int __unorddf2(fp_t a, fp_t b) { const rep_t aAbs = toRep(a) & absMask; @@ -144,3 +142,9 @@ __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); } +#if defined(__ARM_EABI__) +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { + return __unorddf2(a, b); +} +#endif + diff --git a/lib/builtins/comparesf2.c b/lib/builtins/comparesf2.c index 1fd50636abaf..4badb5e1b9f7 100644 --- a/lib/builtins/comparesf2.c +++ b/lib/builtins/comparesf2.c @@ -113,8 +113,6 @@ __gesf2(fp_t a, fp_t b) { } } -ARM_EABI_FNALIAS(fcmpun, unordsf2) - COMPILER_RT_ABI int __unordsf2(fp_t a, fp_t b) { const rep_t aAbs = toRep(a) & absMask; @@ -143,3 +141,10 @@ COMPILER_RT_ABI enum GE_RESULT __gtsf2(fp_t a, fp_t b) { return __gesf2(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { + return __unordsf2(a, b); +} +#endif + diff --git a/lib/builtins/divdf3.c b/lib/builtins/divdf3.c index ab44c2b25fe5..492e32b851e9 100644 --- a/lib/builtins/divdf3.c +++ b/lib/builtins/divdf3.c @@ -19,8 +19,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(ddiv, divdf3) - COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { @@ -183,3 +181,10 @@ __divdf3(fp_t a, fp_t b) { return result; } } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { + return __divdf3(a, b); +} +#endif + diff --git a/lib/builtins/divsf3.c b/lib/builtins/divsf3.c index de2e376125b6..aa6289a6d70a 100644 --- a/lib/builtins/divsf3.c +++ b/lib/builtins/divsf3.c @@ -19,8 +19,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fdiv, divsf3) - COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { @@ -167,3 +165,10 @@ __divsf3(fp_t a, fp_t b) { return fromRep(absResult | quotientSign); } } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { + return __divsf3(a, b); +} +#endif + diff --git a/lib/builtins/divsi3.c b/lib/builtins/divsi3.c index bab4aefda30a..3852e3990b5b 100644 --- a/lib/builtins/divsi3.c +++ b/lib/builtins/divsi3.c @@ -16,8 +16,6 @@ /* Returns: a / b */ -ARM_EABI_FNALIAS(idiv, divsi3) - COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) { @@ -35,3 +33,10 @@ __divsi3(si_int a, si_int b) */ return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) { + return __divsi3(a, b); +} +#endif + diff --git a/lib/builtins/extendhfsf2.c b/lib/builtins/extendhfsf2.c index 27115a48c184..e7d9fde8abfc 100644 --- a/lib/builtins/extendhfsf2.c +++ b/lib/builtins/extendhfsf2.c @@ -12,8 +12,6 @@ #define DST_SINGLE #include "fp_extend_impl.inc" -ARM_EABI_FNALIAS(h2f, extendhfsf2) - // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { @@ -23,3 +21,10 @@ COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { return __extendhfsf2(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_h2f(uint16_t a) { + return __extendhfsf2(a); +} +#endif + diff --git a/lib/builtins/extendsfdf2.c b/lib/builtins/extendsfdf2.c index 7a267c2f47ad..b9e7a7471a98 100644 --- a/lib/builtins/extendsfdf2.c +++ b/lib/builtins/extendsfdf2.c @@ -12,8 +12,13 @@ #define DST_DOUBLE #include "fp_extend_impl.inc" -ARM_EABI_FNALIAS(f2d, extendsfdf2) - COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_f2d(float a) { + return __extendsfdf2(a); +} +#endif + diff --git a/lib/builtins/fixdfdi.c b/lib/builtins/fixdfdi.c index 14283ef42e61..31d76df28255 100644 --- a/lib/builtins/fixdfdi.c +++ b/lib/builtins/fixdfdi.c @@ -10,7 +10,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(d2lz, fixdfdi) #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid @@ -44,3 +43,15 @@ __fixdfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int +#if defined(__SOFT_FP__) +__aeabi_d2lz(fp_t a) { +#else +__aeabi_d2lz(double a) { +#endif + return __fixdfdi(a); +} +#endif + diff --git a/lib/builtins/fixdfsi.c b/lib/builtins/fixdfsi.c index 704e65bc43a1..fc316dcd0545 100644 --- a/lib/builtins/fixdfsi.c +++ b/lib/builtins/fixdfsi.c @@ -14,9 +14,14 @@ typedef si_int fixint_t; typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -ARM_EABI_FNALIAS(d2iz, fixdfsi) - COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { + return __fixdfsi(a); +} +#endif + diff --git a/lib/builtins/fixsfdi.c b/lib/builtins/fixsfdi.c index fab47e272a25..c43473637d60 100644 --- a/lib/builtins/fixsfdi.c +++ b/lib/builtins/fixsfdi.c @@ -11,8 +11,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(f2lz, fixsfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -45,3 +43,15 @@ __fixsfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int +#if defined(__SOFT_FP__) +__aeabi_f2lz(fp_t a) { +#else +__aeabi_f2lz(float a) { +#endif + return __fixsfdi(a); +} +#endif + diff --git a/lib/builtins/fixsfsi.c b/lib/builtins/fixsfsi.c index f045536d6857..3276df966460 100644 --- a/lib/builtins/fixsfsi.c +++ b/lib/builtins/fixsfsi.c @@ -14,9 +14,14 @@ typedef si_int fixint_t; typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -ARM_EABI_FNALIAS(f2iz, fixsfsi) - COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { + return __fixsfsi(a); +} +#endif + diff --git a/lib/builtins/fixunsdfdi.c b/lib/builtins/fixunsdfdi.c index 4b0bc9e1d051..b734409709bf 100644 --- a/lib/builtins/fixunsdfdi.c +++ b/lib/builtins/fixunsdfdi.c @@ -11,8 +11,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(d2ulz, fixunsdfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -42,3 +40,15 @@ __fixunsdfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI du_int +#if defined(__SOFT_FP__) +__aeabi_d2ulz(fp_t a) { +#else +__aeabi_d2ulz(double a) { +#endif + return __fixunsdfdi(a); +} +#endif + diff --git a/lib/builtins/fixunsdfsi.c b/lib/builtins/fixunsdfsi.c index 232d342d77da..bb3d8e0f831b 100644 --- a/lib/builtins/fixunsdfsi.c +++ b/lib/builtins/fixunsdfsi.c @@ -13,9 +13,14 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -ARM_EABI_FNALIAS(d2uiz, fixunsdfsi) - COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { + return __fixunsdfsi(a); +} +#endif + diff --git a/lib/builtins/fixunssfdi.c b/lib/builtins/fixunssfdi.c index f8ebab854f95..5d92245df0d9 100644 --- a/lib/builtins/fixunssfdi.c +++ b/lib/builtins/fixunssfdi.c @@ -11,8 +11,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(f2ulz, fixunssfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -43,3 +41,15 @@ __fixunssfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI du_int +#if defined(__SOFT_FP__) +__aeabi_f2ulz(fp_t a) { +#else +__aeabi_f2ulz(float a) { +#endif + return __fixunssfdi(a); +} +#endif + diff --git a/lib/builtins/fixunssfsi.c b/lib/builtins/fixunssfsi.c index cc2b05bd84f8..91d5e8ae5d7f 100644 --- a/lib/builtins/fixunssfsi.c +++ b/lib/builtins/fixunssfsi.c @@ -17,9 +17,14 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -ARM_EABI_FNALIAS(f2uiz, fixunssfsi) - COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { + return __fixunssfsi(a); +} +#endif + diff --git a/lib/builtins/floatdidf.c b/lib/builtins/floatdidf.c index 2b023ad08beb..fccb29072407 100644 --- a/lib/builtins/floatdidf.c +++ b/lib/builtins/floatdidf.c @@ -22,8 +22,6 @@ /* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ -ARM_EABI_FNALIAS(l2d, floatdidf) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; we'll set the inexact flag * as a side-effect of this computation. @@ -105,3 +103,10 @@ __floatdidf(di_int a) return fb.f; } #endif + +#if defined(__AEABI__) +AEABI_RTABI double __aeabi_l2d(di_int a) { + return __floatdidf(a); +} +#endif + diff --git a/lib/builtins/floatdisf.c b/lib/builtins/floatdisf.c index 3e47580ef576..dd548165c373 100644 --- a/lib/builtins/floatdisf.c +++ b/lib/builtins/floatdisf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(l2f, floatdisf) - COMPILER_RT_ABI float __floatdisf(di_int a) { @@ -78,3 +76,10 @@ __floatdisf(di_int a) ((su_int)a & 0x007FFFFF); /* mantissa */ return fb.f; } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_l2f(di_int a) { + return __floatdisf(a); +} +#endif + diff --git a/lib/builtins/floatsidf.c b/lib/builtins/floatsidf.c index 1cf99b782a60..2ae395bdc1db 100644 --- a/lib/builtins/floatsidf.c +++ b/lib/builtins/floatsidf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(i2d, floatsidf) - COMPILER_RT_ABI fp_t __floatsidf(int a) { @@ -51,3 +49,10 @@ __floatsidf(int a) { // Insert the sign bit and return return fromRep(result | sign); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_i2d(int a) { + return __floatsidf(a); +} +#endif + diff --git a/lib/builtins/floatsisf.c b/lib/builtins/floatsisf.c index 467dd1d1eaf1..08891fcdf201 100644 --- a/lib/builtins/floatsisf.c +++ b/lib/builtins/floatsisf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(i2f, floatsisf) - COMPILER_RT_ABI fp_t __floatsisf(int a) { @@ -57,3 +55,10 @@ __floatsisf(int a) { // Insert the sign bit and return return fromRep(result | sign); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_i2f(int a) { + return __floatsisf(a); +} +#endif + diff --git a/lib/builtins/floatundidf.c b/lib/builtins/floatundidf.c index cfd3a7a3b33f..6c1a931ef2f3 100644 --- a/lib/builtins/floatundidf.c +++ b/lib/builtins/floatundidf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ul2d, floatundidf) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; we'll set the inexact flag * as a side-effect of this computation. @@ -104,3 +102,10 @@ __floatundidf(du_int a) return fb.f; } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_ul2d(du_int a) { + return __floatundidf(a); +} +#endif + diff --git a/lib/builtins/floatundisf.c b/lib/builtins/floatundisf.c index 713a44abc8bd..86841a75dc66 100644 --- a/lib/builtins/floatundisf.c +++ b/lib/builtins/floatundisf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ul2f, floatundisf) - COMPILER_RT_ABI float __floatundisf(du_int a) { @@ -75,3 +73,10 @@ __floatundisf(du_int a) ((su_int)a & 0x007FFFFF); /* mantissa */ return fb.f; } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_ul2f(du_int a) { + return __floatundisf(a); +} +#endif + diff --git a/lib/builtins/floatunsidf.c b/lib/builtins/floatunsidf.c index 445e18041c48..8d4807194f0b 100644 --- a/lib/builtins/floatunsidf.c +++ b/lib/builtins/floatunsidf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ui2d, floatunsidf) - COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) { @@ -40,3 +38,10 @@ __floatunsidf(unsigned int a) { result += (rep_t)(exponent + exponentBias) << significandBits; return fromRep(result); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { + return __floatunsidf(a); +} +#endif + diff --git a/lib/builtins/floatunsisf.c b/lib/builtins/floatunsisf.c index ea6f161adc02..f194c046d2fb 100644 --- a/lib/builtins/floatunsisf.c +++ b/lib/builtins/floatunsisf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ui2f, floatunsisf) - COMPILER_RT_ABI fp_t __floatunsisf(unsigned int a) { @@ -48,3 +46,10 @@ __floatunsisf(unsigned int a) { result += (rep_t)(exponent + exponentBias) << significandBits; return fromRep(result); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { + return __floatunsisf(a); +} +#endif + diff --git a/lib/builtins/int_lib.h b/lib/builtins/int_lib.h index 8a202dde70f1..9a8092d50d8e 100644 --- a/lib/builtins/int_lib.h +++ b/lib/builtins/int_lib.h @@ -30,18 +30,17 @@ /* ABI macro definitions */ #if __ARM_EABI__ -# define ARM_EABI_FNALIAS(aeabi_name, name) \ - void __aeabi_##aeabi_name() __attribute__((alias("__" #name))); # ifdef COMPILER_RT_ARMHF_TARGET # define COMPILER_RT_ABI # else -# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) +# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) # endif #else -# define ARM_EABI_FNALIAS(aeabi_name, name) # define COMPILER_RT_ABI #endif +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + #ifdef _MSC_VER #define ALWAYS_INLINE __forceinline #define NOINLINE __declspec(noinline) diff --git a/lib/builtins/lshrdi3.c b/lib/builtins/lshrdi3.c index 6b1ea923b778..becbbef4eb09 100644 --- a/lib/builtins/lshrdi3.c +++ b/lib/builtins/lshrdi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(llsr, lshrdi3) - COMPILER_RT_ABI di_int __lshrdi3(di_int a, si_int b) { @@ -41,3 +39,10 @@ __lshrdi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) { + return __lshrdi3(a, b); +} +#endif + diff --git a/lib/builtins/muldf3.c b/lib/builtins/muldf3.c index 1eb733849e5a..59a60190eba3 100644 --- a/lib/builtins/muldf3.c +++ b/lib/builtins/muldf3.c @@ -15,8 +15,13 @@ #define DOUBLE_PRECISION #include "fp_mul_impl.inc" -ARM_EABI_FNALIAS(dmul, muldf3) - COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { + return __muldf3(a, b); +} +#endif + diff --git a/lib/builtins/muldi3.c b/lib/builtins/muldi3.c index 2dae44c11b95..6818a9e2f722 100644 --- a/lib/builtins/muldi3.c +++ b/lib/builtins/muldi3.c @@ -40,8 +40,6 @@ __muldsi3(su_int a, su_int b) /* Returns: a * b */ -ARM_EABI_FNALIAS(lmul, muldi3) - COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) { @@ -54,3 +52,10 @@ __muldi3(di_int a, di_int b) r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; return r.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) { + return __muldi3(a, b); +} +#endif + diff --git a/lib/builtins/mulsf3.c b/lib/builtins/mulsf3.c index 478b3bc0e0e0..f141af1acc58 100644 --- a/lib/builtins/mulsf3.c +++ b/lib/builtins/mulsf3.c @@ -15,8 +15,13 @@ #define SINGLE_PRECISION #include "fp_mul_impl.inc" -ARM_EABI_FNALIAS(fmul, mulsf3) - COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { + return __mulsf3(a, b); +} +#endif + diff --git a/lib/builtins/negdf2.c b/lib/builtins/negdf2.c index d634b421cb79..5e2544cdb4be 100644 --- a/lib/builtins/negdf2.c +++ b/lib/builtins/negdf2.c @@ -14,9 +14,14 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(dneg, negdf2) - COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dneg(fp_t a) { + return __negdf2(a); +} +#endif + diff --git a/lib/builtins/negsf2.c b/lib/builtins/negsf2.c index 29c17be4145f..f90b34335680 100644 --- a/lib/builtins/negsf2.c +++ b/lib/builtins/negsf2.c @@ -14,9 +14,14 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fneg, negsf2) - COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fneg(fp_t a) { + return __negsf2(a); +} +#endif + diff --git a/lib/builtins/subdf3.c b/lib/builtins/subdf3.c index 7a79e5e7765d..38340dfab1a6 100644 --- a/lib/builtins/subdf3.c +++ b/lib/builtins/subdf3.c @@ -15,11 +15,15 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(dsub, subdf3) - // Subtraction; flip the sign bit of b and add. COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) { return __adddf3(a, fromRep(toRep(b) ^ signBit)); } +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) { + return __subdf3(a, b); +} +#endif + diff --git a/lib/builtins/subsf3.c b/lib/builtins/subsf3.c index c3b85144af48..34276b1447ba 100644 --- a/lib/builtins/subsf3.c +++ b/lib/builtins/subsf3.c @@ -15,11 +15,15 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fsub, subsf3) - // Subtraction; flip the sign bit of b and add. COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) { return __addsf3(a, fromRep(toRep(b) ^ signBit)); } +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { + return __subsf3(a, b); +} +#endif + diff --git a/lib/builtins/truncdfhf2.c b/lib/builtins/truncdfhf2.c index 17195cd9e799..4bb71aa178a0 100644 --- a/lib/builtins/truncdfhf2.c +++ b/lib/builtins/truncdfhf2.c @@ -11,8 +11,13 @@ #define DST_HALF #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(d2h, truncdfhf2) - COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { return __truncXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI uint16_t __aeabi_d2h(double a) { + return __truncdfhf2(a); +} +#endif + diff --git a/lib/builtins/truncdfsf2.c b/lib/builtins/truncdfsf2.c index 46ec11dccd79..8bf58bb23a3b 100644 --- a/lib/builtins/truncdfsf2.c +++ b/lib/builtins/truncdfsf2.c @@ -11,8 +11,13 @@ #define DST_SINGLE #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(d2f, truncdfsf2) - COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_d2f(double a) { + return __truncdfsf2(a); +} +#endif + diff --git a/lib/builtins/truncsfhf2.c b/lib/builtins/truncsfhf2.c index 9d61895bfd88..f6ce1fa1de05 100644 --- a/lib/builtins/truncsfhf2.c +++ b/lib/builtins/truncsfhf2.c @@ -11,8 +11,6 @@ #define DST_HALF #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(f2h, truncsfhf2) - // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { @@ -22,3 +20,10 @@ COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI uint16_t __aeabi_f2h(float a) { + return __truncsfhf2(a); +} +#endif + diff --git a/lib/builtins/udivsi3.c b/lib/builtins/udivsi3.c index 5d0140cc3e75..8eccf102cc97 100644 --- a/lib/builtins/udivsi3.c +++ b/lib/builtins/udivsi3.c @@ -18,8 +18,6 @@ /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ -ARM_EABI_FNALIAS(uidiv, udivsi3) - /* This function should not call __divsi3! */ COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) @@ -64,3 +62,10 @@ __udivsi3(su_int n, su_int d) q = (q << 1) | carry; return q; } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) { + return __udivsi3(n, d); +} +#endif + diff --git a/lib/esan/esan_interceptors.cpp b/lib/esan/esan_interceptors.cpp index 9740f4dae8fa..62fa13c83822 100644 --- a/lib/esan/esan_interceptors.cpp +++ b/lib/esan/esan_interceptors.cpp @@ -31,6 +31,8 @@ using namespace __esan; // NOLINT // Get the per-platform defines for what is possible to intercept #include "sanitizer_common/sanitizer_platform_interceptors.h" +DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr) + // TODO(bruening): tsan disables several interceptors (getpwent, etc.) claiming // that interception is a perf hit: should we do the same? diff --git a/lib/lsan/lsan_allocator.h b/lib/lsan/lsan_allocator.h index fad5adb01a7f..5a0d94c71415 100644 --- a/lib/lsan/lsan_allocator.h +++ b/lib/lsan/lsan_allocator.h @@ -55,10 +55,18 @@ struct ChunkMetadata { static const uptr kRegionSizeLog = 20; static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog; typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; -typedef CompactSizeClassMap SizeClassMap; -typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, - sizeof(ChunkMetadata), SizeClassMap, kRegionSizeLog, ByteMap> - PrimaryAllocator; + +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = sizeof(ChunkMetadata); + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = __lsan::kRegionSizeLog; + typedef __lsan::ByteMap ByteMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +typedef SizeClassAllocator32 PrimaryAllocator; #elif defined(__x86_64__) || defined(__powerpc64__) struct AP64 { // Allocator64 parameters. Deliberately using a short name. static const uptr kSpaceBeg = 0x600000000000ULL; diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc index fadd0263de73..c903be42d1e7 100644 --- a/lib/lsan/lsan_common_linux.cc +++ b/lib/lsan/lsan_common_linux.cc @@ -62,8 +62,10 @@ void InitializePlatformSpecificModules() { return; } } - VReport(1, "LeakSanitizer: Dynamic linker not found. " - "TLS will not be handled correctly.\n"); + if (linker == nullptr) { + VReport(1, "LeakSanitizer: Dynamic linker not found. " + "TLS will not be handled correctly.\n"); + } } static int ProcessGlobalRegionsCallback(struct dl_phdr_info *info, size_t size, diff --git a/lib/lsan/lsan_common_mac.cc b/lib/lsan/lsan_common_mac.cc index a9adcdfff37f..5ee1e228691a 100644 --- a/lib/lsan/lsan_common_mac.cc +++ b/lib/lsan/lsan_common_mac.cc @@ -144,6 +144,11 @@ void ProcessPlatformSpecificAllocations(Frontier *frontier) { if (info.user_tag == VM_MEMORY_OS_ALLOC_ONCE) { ScanRangeForPointers(address, end_address, frontier, "GLOBAL", kReachable); + + // Recursing over the full memory map is very slow, break out + // early if we don't need the full iteration. + if (!flags()->use_root_regions || !root_regions->size()) + break; } // This additional root region scan is required on Darwin in order to diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc index 6c389f008cf7..1be573faa412 100644 --- a/lib/msan/msan_allocator.cc +++ b/lib/msan/msan_allocator.cc @@ -47,12 +47,18 @@ struct MsanMapUnmapCallback { static const uptr kRegionSizeLog = 20; static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog; typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; - typedef CompactSizeClassMap SizeClassMap; - - typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, sizeof(Metadata), - SizeClassMap, kRegionSizeLog, ByteMap, - MsanMapUnmapCallback> PrimaryAllocator; + struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = sizeof(Metadata); + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = __msan::kRegionSizeLog; + typedef __msan::ByteMap ByteMap; + typedef MsanMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; + }; + typedef SizeClassAllocator32 PrimaryAllocator; #elif defined(__x86_64__) #if SANITIZER_LINUX && !defined(MSAN_LINUX_X86_64_OLD_MAPPING) static const uptr kAllocatorSpace = 0x700000000000ULL; @@ -90,11 +96,18 @@ struct MsanMapUnmapCallback { static const uptr kRegionSizeLog = 20; static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog; typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; - typedef CompactSizeClassMap SizeClassMap; - typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, sizeof(Metadata), - SizeClassMap, kRegionSizeLog, ByteMap, - MsanMapUnmapCallback> PrimaryAllocator; + struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = sizeof(Metadata); + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = __msan::kRegionSizeLog; + typedef __msan::ByteMap ByteMap; + typedef MsanMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; + }; + typedef SizeClassAllocator32 PrimaryAllocator; #endif typedef SizeClassAllocatorLocalCache AllocatorCache; typedef LargeMmapAllocator SecondaryAllocator; diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc index 15543bd912d6..0f50693441be 100644 --- a/lib/msan/msan_interceptors.cc +++ b/lib/msan/msan_interceptors.cc @@ -341,33 +341,6 @@ INTERCEPTOR(char *, __strdup, char *src) { #define MSAN_MAYBE_INTERCEPT___STRDUP #endif -INTERCEPTOR(char *, strndup, char *src, SIZE_T n) { - ENSURE_MSAN_INITED(); - GET_STORE_STACK_TRACE; - // On FreeBSD strndup() leverages strnlen(). - InterceptorScope interceptor_scope; - SIZE_T copy_size = REAL(strnlen)(src, n); - char *res = REAL(strndup)(src, n); - CopyShadowAndOrigin(res, src, copy_size, &stack); - __msan_unpoison(res + copy_size, 1); // \0 - return res; -} - -#if !SANITIZER_FREEBSD -INTERCEPTOR(char *, __strndup, char *src, SIZE_T n) { - ENSURE_MSAN_INITED(); - GET_STORE_STACK_TRACE; - SIZE_T copy_size = REAL(strnlen)(src, n); - char *res = REAL(__strndup)(src, n); - CopyShadowAndOrigin(res, src, copy_size, &stack); - __msan_unpoison(res + copy_size, 1); // \0 - return res; -} -#define MSAN_MAYBE_INTERCEPT___STRNDUP INTERCEPT_FUNCTION(__strndup) -#else -#define MSAN_MAYBE_INTERCEPT___STRNDUP -#endif - INTERCEPTOR(char *, gcvt, double number, SIZE_T ndigit, char *buf) { ENSURE_MSAN_INITED(); char *res = REAL(gcvt)(number, ndigit, buf); @@ -1371,6 +1344,13 @@ int OnExit() { return __msan_memcpy(to, from, size); \ } +#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) \ + do { \ + GET_STORE_STACK_TRACE; \ + CopyShadowAndOrigin(to, from, size, &stack); \ + __msan_unpoison(to + size, 1); \ + } while (false) + #include "sanitizer_common/sanitizer_platform_interceptors.h" #include "sanitizer_common/sanitizer_common_interceptors.inc" @@ -1538,8 +1518,6 @@ void InitializeInterceptors() { INTERCEPT_FUNCTION(stpcpy); // NOLINT INTERCEPT_FUNCTION(strdup); MSAN_MAYBE_INTERCEPT___STRDUP; - INTERCEPT_FUNCTION(strndup); - MSAN_MAYBE_INTERCEPT___STRNDUP; INTERCEPT_FUNCTION(strncpy); // NOLINT INTERCEPT_FUNCTION(gcvt); INTERCEPT_FUNCTION(strcat); // NOLINT diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc index dd81c4d798f6..58f695e69e12 100644 --- a/lib/msan/tests/msan_test.cc +++ b/lib/msan/tests/msan_test.cc @@ -1581,7 +1581,8 @@ TEST(MemorySanitizer, strdup) { TEST(MemorySanitizer, strndup) { char buf[4] = "abc"; __msan_poison(buf + 2, sizeof(*buf)); - char *x = strndup(buf, 3); + char *x; + EXPECT_UMR(x = strndup(buf, 3)); EXPECT_NOT_POISONED(x[0]); EXPECT_NOT_POISONED(x[1]); EXPECT_POISONED(x[2]); @@ -1593,7 +1594,8 @@ TEST(MemorySanitizer, strndup_short) { char buf[4] = "abc"; __msan_poison(buf + 1, sizeof(*buf)); __msan_poison(buf + 2, sizeof(*buf)); - char *x = strndup(buf, 2); + char *x; + EXPECT_UMR(x = strndup(buf, 2)); EXPECT_NOT_POISONED(x[0]); EXPECT_POISONED(x[1]); EXPECT_NOT_POISONED(x[2]); @@ -2203,10 +2205,51 @@ TEST(MemorySanitizer, localtime_r) { EXPECT_NE(0U, strlen(time.tm_zone)); } +#if !defined(__FreeBSD__) +/* Creates a temporary file with contents similar to /etc/fstab to be used + with getmntent{_r}. */ +class TempFstabFile { + public: + TempFstabFile() : fd (-1) { } + ~TempFstabFile() { + if (fd >= 0) + close (fd); + } + + bool Create(void) { + snprintf(tmpfile, sizeof(tmpfile), "/tmp/msan.getmntent.tmp.XXXXXX"); + + fd = mkstemp(tmpfile); + if (fd == -1) + return false; + + const char entry[] = "/dev/root / ext4 errors=remount-ro 0 1"; + size_t entrylen = sizeof(entry); + + size_t bytesWritten = write(fd, entry, entrylen); + if (entrylen != bytesWritten) + return false; + + return true; + } + + const char* FileName(void) { + return tmpfile; + } + + private: + char tmpfile[128]; + int fd; +}; +#endif + // There's no getmntent() on FreeBSD. #if !defined(__FreeBSD__) TEST(MemorySanitizer, getmntent) { - FILE *fp = setmntent("/etc/fstab", "r"); + TempFstabFile fstabtmp; + ASSERT_TRUE(fstabtmp.Create()); + FILE *fp = setmntent(fstabtmp.FileName(), "r"); + struct mntent *mnt = getmntent(fp); ASSERT_TRUE(mnt != NULL); ASSERT_NE(0U, strlen(mnt->mnt_fsname)); @@ -2222,7 +2265,10 @@ TEST(MemorySanitizer, getmntent) { // There's no getmntent_r() on FreeBSD. #if !defined(__FreeBSD__) TEST(MemorySanitizer, getmntent_r) { - FILE *fp = setmntent("/etc/fstab", "r"); + TempFstabFile fstabtmp; + ASSERT_TRUE(fstabtmp.Create()); + FILE *fp = setmntent(fstabtmp.FileName(), "r"); + struct mntent mntbuf; char buf[1000]; struct mntent *mnt = getmntent_r(fp, &mntbuf, buf, sizeof(buf)); @@ -3678,8 +3724,10 @@ TEST(MemorySanitizer, ICmpRelational) { EXPECT_POISONED(poisoned(6, 0xF) > poisoned(7, 0)); EXPECT_POISONED(poisoned(0xF, 0xF) > poisoned(7, 0)); - - EXPECT_NOT_POISONED(poisoned(-1, 0x80000000U) >= poisoned(-1, 0U)); + // Note that "icmp op X, Y" is approximated with "or shadow(X), shadow(Y)" + // and therefore may generate false positives in some cases, e.g. the + // following one: + // EXPECT_NOT_POISONED(poisoned(-1, 0x80000000U) >= poisoned(-1, 0U)); } #if MSAN_HAS_M128 diff --git a/lib/sanitizer_common/sanitizer_allocator_internal.h b/lib/sanitizer_common/sanitizer_allocator_internal.h index e939cbe01c3c..d1890f20f810 100644 --- a/lib/sanitizer_common/sanitizer_allocator_internal.h +++ b/lib/sanitizer_common/sanitizer_allocator_internal.h @@ -23,21 +23,25 @@ namespace __sanitizer { // purposes. typedef CompactSizeClassMap InternalSizeClassMap; -static const uptr kInternalAllocatorSpace = 0; -static const u64 kInternalAllocatorSize = SANITIZER_MMAP_RANGE_SIZE; static const uptr kInternalAllocatorRegionSizeLog = 20; -#if SANITIZER_WORDSIZE == 32 static const uptr kInternalAllocatorNumRegions = - kInternalAllocatorSize >> kInternalAllocatorRegionSizeLog; + SANITIZER_MMAP_RANGE_SIZE >> kInternalAllocatorRegionSizeLog; +#if SANITIZER_WORDSIZE == 32 typedef FlatByteMap ByteMap; #else -static const uptr kInternalAllocatorNumRegions = - kInternalAllocatorSize >> kInternalAllocatorRegionSizeLog; typedef TwoLevelByteMap<(kInternalAllocatorNumRegions >> 12), 1 << 12> ByteMap; #endif -typedef SizeClassAllocator32< - kInternalAllocatorSpace, kInternalAllocatorSize, 0, InternalSizeClassMap, - kInternalAllocatorRegionSizeLog, ByteMap> PrimaryInternalAllocator; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = 0; + typedef InternalSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = kInternalAllocatorRegionSizeLog; + typedef __sanitizer::ByteMap ByteMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +typedef SizeClassAllocator32 PrimaryInternalAllocator; typedef SizeClassAllocatorLocalCache InternalAllocatorCache; diff --git a/lib/sanitizer_common/sanitizer_allocator_primary32.h b/lib/sanitizer_common/sanitizer_allocator_primary32.h index 2882afd1fe1d..0f6f4f7f8503 100644 --- a/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -36,13 +36,27 @@ template struct SizeClassAllocator32LocalCache; // // In order to avoid false sharing the objects of this class should be // chache-line aligned. -template + +struct SizeClassAllocator32FlagMasks { // Bit masks. + enum { + kRandomShuffleChunks = 1, + }; +}; + +template class SizeClassAllocator32 { public: + static const uptr kSpaceBeg = Params::kSpaceBeg; + static const u64 kSpaceSize = Params::kSpaceSize; + static const uptr kMetadataSize = Params::kMetadataSize; + typedef typename Params::SizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = Params::kRegionSizeLog; + typedef typename Params::ByteMap ByteMap; + typedef typename Params::MapUnmapCallback MapUnmapCallback; + + static const bool kRandomShuffleChunks = + Params::kFlags & SizeClassAllocator32FlagMasks::kRandomShuffleChunks; + struct TransferBatch { static const uptr kMaxNumCached = SizeClassMap::kMaxNumCachedHint - 2; void SetFromArray(uptr region_beg_unused, void *batch[], uptr count) { @@ -86,8 +100,7 @@ class SizeClassAllocator32 { return SizeClassMap::Size(class_id); } - typedef SizeClassAllocator32 ThisT; + typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocator32LocalCache AllocatorCache; void Init(s32 release_to_os_interval_ms) { diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc index 53204b48e300..3c69726d7c91 100644 --- a/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -34,6 +34,8 @@ // COMMON_INTERCEPTOR_MEMSET_IMPL // COMMON_INTERCEPTOR_MEMMOVE_IMPL // COMMON_INTERCEPTOR_MEMCPY_IMPL +// COMMON_INTERCEPTOR_COPY_STRING +// COMMON_INTERCEPTOR_STRNDUP_IMPL //===----------------------------------------------------------------------===// #include "interception/interception.h" @@ -217,6 +219,25 @@ bool PlatformHasDifferentMemcpyAndMemmove(); } #endif +#ifndef COMMON_INTERCEPTOR_COPY_STRING +#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) {} +#endif + +#ifndef COMMON_INTERCEPTOR_STRNDUP_IMPL +#define COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size) \ + COMMON_INTERCEPTOR_ENTER(ctx, strndup, s, size); \ + uptr from_length = internal_strnlen(s, size); \ + uptr copy_length = Min(size, from_length); \ + char *new_mem = (char *)WRAP(malloc)(copy_length + 1); \ + if (common_flags()->intercept_strndup) { \ + COMMON_INTERCEPTOR_READ_RANGE(ctx, s, copy_length + 1); \ + } \ + COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length); \ + internal_memcpy(new_mem, s, copy_length); \ + new_mem[copy_length] = '\0'; \ + return new_mem; +#endif + struct FileMetadata { // For open_memstream(). char **addr; @@ -300,6 +321,26 @@ INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) { #define INIT_STRNLEN #endif +#if SANITIZER_INTERCEPT_STRNDUP +INTERCEPTOR(char*, strndup, const char *s, uptr size) { + void *ctx; + COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size); +} +#define INIT_STRNDUP COMMON_INTERCEPT_FUNCTION(strndup) +#else +#define INIT_STRNDUP +#endif // SANITIZER_INTERCEPT_STRNDUP + +#if SANITIZER_INTERCEPT___STRNDUP +INTERCEPTOR(char*, __strndup, const char *s, uptr size) { + void *ctx; + COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size); +} +#define INIT___STRNDUP COMMON_INTERCEPT_FUNCTION(__strndup) +#else +#define INIT___STRNDUP +#endif // SANITIZER_INTERCEPT___STRNDUP + #if SANITIZER_INTERCEPT_TEXTDOMAIN INTERCEPTOR(char*, textdomain, const char *domainname) { void *ctx; @@ -6163,6 +6204,8 @@ static void InitializeCommonInterceptors() { INIT_TEXTDOMAIN; INIT_STRLEN; INIT_STRNLEN; + INIT_STRNDUP; + INIT___STRNDUP; INIT_STRCMP; INIT_STRNCMP; INIT_STRCASECMP; diff --git a/lib/sanitizer_common/sanitizer_flags.inc b/lib/sanitizer_common/sanitizer_flags.inc index 7a5fffcf6165..67a0a5810a28 100644 --- a/lib/sanitizer_common/sanitizer_flags.inc +++ b/lib/sanitizer_common/sanitizer_flags.inc @@ -195,6 +195,9 @@ COMMON_FLAG(bool, intercept_strpbrk, true, COMMON_FLAG(bool, intercept_strlen, true, "If set, uses custom wrappers for strlen and strnlen functions " "to find more errors.") +COMMON_FLAG(bool, intercept_strndup, true, + "If set, uses custom wrappers for strndup functions " + "to find more errors.") COMMON_FLAG(bool, intercept_strchr, true, "If set, uses custom wrappers for strchr, strchrnul, and strrchr " "functions to find more errors.") diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h index e5644ef25e83..a95497467d61 100644 --- a/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -25,6 +25,12 @@ # define SI_NOT_WINDOWS 0 #endif +#if SANITIZER_POSIX +# define SI_POSIX 1 +#else +# define SI_POSIX 0 +#endif + #if SANITIZER_LINUX && !SANITIZER_ANDROID # define SI_LINUX_NOT_ANDROID 1 #else @@ -69,6 +75,12 @@ # define SI_UNIX_NOT_MAC 0 #endif +#if SANITIZER_LINUX && !SANITIZER_FREEBSD +# define SI_LINUX_NOT_FREEBSD 1 +# else +# define SI_LINUX_NOT_FREEBSD 0 +#endif + #define SANITIZER_INTERCEPT_STRLEN 1 #define SANITIZER_INTERCEPT_STRNLEN SI_NOT_MAC #define SANITIZER_INTERCEPT_STRCMP 1 @@ -86,6 +98,8 @@ #define SANITIZER_INTERCEPT_MEMMOVE 1 #define SANITIZER_INTERCEPT_MEMCPY 1 #define SANITIZER_INTERCEPT_MEMCMP 1 +#define SANITIZER_INTERCEPT_STRNDUP SI_POSIX +#define SANITIZER_INTERCEPT___STRNDUP SI_LINUX_NOT_FREEBSD #if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1070 # define SI_MAC_DEPLOYMENT_BELOW_10_7 1 diff --git a/lib/sanitizer_common/sanitizer_procmaps.h b/lib/sanitizer_common/sanitizer_procmaps.h index 9dbb5ef0f4f6..5aad6b959ad4 100644 --- a/lib/sanitizer_common/sanitizer_procmaps.h +++ b/lib/sanitizer_common/sanitizer_procmaps.h @@ -70,6 +70,7 @@ class MemoryMappingLayout { bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[], uptr filename_size, ModuleArch *arch, u8 *uuid, uptr *protection); + void GetSegmentAddrRange(uptr *start, uptr *end, uptr vmaddr, uptr vmsize); int current_image_; u32 current_magic_; u32 current_filetype_; diff --git a/lib/sanitizer_common/sanitizer_procmaps_linux.cc b/lib/sanitizer_common/sanitizer_procmaps_linux.cc index fdf85b77a680..7e4a44be95b6 100644 --- a/lib/sanitizer_common/sanitizer_procmaps_linux.cc +++ b/lib/sanitizer_common/sanitizer_procmaps_linux.cc @@ -18,8 +18,8 @@ namespace __sanitizer { void ReadProcMaps(ProcSelfMapsBuff *proc_maps) { - CHECK(ReadFileToBuffer("/proc/self/maps", &proc_maps->data, - &proc_maps->mmaped_size, &proc_maps->len)); + ReadFileToBuffer("/proc/self/maps", &proc_maps->data, &proc_maps->mmaped_size, + &proc_maps->len); } static bool IsOneOf(char c, char c1, char c2) { diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc index be59b481f5a1..0b4171a90f60 100644 --- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc +++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc @@ -18,6 +18,7 @@ #include #include +#include // These are not available in older macOS SDKs. #ifndef CPU_SUBTYPE_X86_64_H @@ -71,6 +72,13 @@ void MemoryMappingLayout::Reset() { internal_memset(current_uuid_, 0, kModuleUUIDSize); } +// The dyld load address should be unchanged throughout process execution, +// and it is expensive to compute once many libraries have been loaded, +// so cache it here and do not reset. +static mach_header *dyld_hdr = 0; +static const char kDyldPath[] = "/usr/lib/dyld"; +static const int kDyldImageIdx = -1; + // static void MemoryMappingLayout::CacheMemoryMappings() { // No-op on Mac for now. @@ -95,14 +103,12 @@ bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset, const char *lc = current_load_cmd_addr_; current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize; if (((const load_command *)lc)->cmd == kLCSegment) { - const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_); const SegmentCommand* sc = (const SegmentCommand *)lc; - if (start) *start = sc->vmaddr + dlloff; + GetSegmentAddrRange(start, end, sc->vmaddr, sc->vmsize); if (protection) { // Return the initial protection. *protection = sc->initprot; } - if (end) *end = sc->vmaddr + sc->vmsize + dlloff; if (offset) { if (current_filetype_ == /*MH_EXECUTE*/ 0x2) { *offset = sc->vmaddr; @@ -111,8 +117,12 @@ bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset, } } if (filename) { - internal_strncpy(filename, _dyld_get_image_name(current_image_), - filename_size); + if (current_image_ == kDyldImageIdx) { + internal_strncpy(filename, kDyldPath, filename_size); + } else { + internal_strncpy(filename, _dyld_get_image_name(current_image_), + filename_size); + } } if (arch) { *arch = current_arch_; @@ -180,11 +190,74 @@ static bool IsModuleInstrumented(const load_command *first_lc) { return false; } +// _dyld_get_image_header() and related APIs don't report dyld itself. +// We work around this by manually recursing through the memory map +// until we hit a Mach header matching dyld instead. These recurse +// calls are expensive, but the first memory map generation occurs +// early in the process, when dyld is one of the only images loaded, +// so it will be hit after only a few iterations. +static mach_header *get_dyld_image_header() { + mach_port_name_t port; + if (task_for_pid(mach_task_self(), internal_getpid(), &port) != + KERN_SUCCESS) { + return nullptr; + } + + unsigned depth = 1; + vm_size_t size = 0; + vm_address_t address = 0; + kern_return_t err = KERN_SUCCESS; + mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; + + while (true) { + struct vm_region_submap_info_64 info; + err = vm_region_recurse_64(port, &address, &size, &depth, + (vm_region_info_t)&info, &count); + if (err != KERN_SUCCESS) return nullptr; + + if (size >= sizeof(mach_header) && + info.protection & MemoryMappingLayout::kProtectionRead) { + mach_header *hdr = (mach_header *)address; + if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && + hdr->filetype == MH_DYLINKER) { + return hdr; + } + } + address += size; + } +} + +const mach_header *get_dyld_hdr() { + if (!dyld_hdr) dyld_hdr = get_dyld_image_header(); + + return dyld_hdr; +} + +void MemoryMappingLayout::GetSegmentAddrRange(uptr *start, uptr *end, + uptr vmaddr, uptr vmsize) { + if (current_image_ == kDyldImageIdx) { + // vmaddr is masked with 0xfffff because on macOS versions < 10.12, + // it contains an absolute address rather than an offset for dyld. + // To make matters even more complicated, this absolute address + // isn't actually the absolute segment address, but the offset portion + // of the address is accurate when combined with the dyld base address, + // and the mask will give just this offset. + if (start) *start = (vmaddr & 0xfffff) + (uptr)get_dyld_hdr(); + if (end) *end = (vmaddr & 0xfffff) + vmsize + (uptr)get_dyld_hdr(); + } else { + const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_); + if (start) *start = vmaddr + dlloff; + if (end) *end = vmaddr + vmsize + dlloff; + } +} + bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset, char filename[], uptr filename_size, uptr *protection, ModuleArch *arch, u8 *uuid) { - for (; current_image_ >= 0; current_image_--) { - const mach_header* hdr = _dyld_get_image_header(current_image_); + for (; current_image_ >= kDyldImageIdx; current_image_--) { + const mach_header *hdr = (current_image_ == kDyldImageIdx) + ? get_dyld_hdr() + : _dyld_get_image_header(current_image_); if (!hdr) continue; if (current_load_cmd_count_ < 0) { // Set up for this image; diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc index 20b8760935bd..0c27c472f02e 100644 --- a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc +++ b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc @@ -170,6 +170,10 @@ PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( internal_memcpy(buffer, ®s, sizeof(regs)); *sp = regs.SP_REG; + // On x86_64 and aarch64, we must account for the stack redzone, which is 128 + // bytes. + if (SANITIZER_WORDSIZE == 64) *sp -= 128; + return REGISTERS_AVAILABLE; } diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc index e14517fca518..b28159a2adaf 100644 --- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc +++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc @@ -108,13 +108,17 @@ static const u64 kAddressSpaceSize = 1ULL << 32; static const uptr kRegionSizeLog = FIRST_32_SECOND_64(20, 24); static const uptr kFlatByteMapSize = kAddressSpaceSize >> kRegionSizeLog; -typedef SizeClassAllocator32< - 0, kAddressSpaceSize, - /*kMetadataSize*/16, - CompactSizeClassMap, - kRegionSizeLog, - FlatByteMap > - Allocator32Compact; +struct AP32Compact { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = kAddressSpaceSize; + static const uptr kMetadataSize = 16; + typedef CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = ::kRegionSizeLog; + typedef FlatByteMap ByteMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +typedef SizeClassAllocator32 Allocator32Compact; template void TestSizeClassMap() { @@ -386,17 +390,21 @@ TEST(SanitizerCommon, SizeClassAllocator64MapUnmapCallback) { #endif #endif +struct AP32WithCallback { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = kAddressSpaceSize; + static const uptr kMetadataSize = 16; + typedef CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = ::kRegionSizeLog; + typedef FlatByteMap ByteMap; + typedef TestMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; + TEST(SanitizerCommon, SizeClassAllocator32MapUnmapCallback) { TestMapUnmapCallback::map_count = 0; TestMapUnmapCallback::unmap_count = 0; - typedef SizeClassAllocator32< - 0, kAddressSpaceSize, - /*kMetadataSize*/16, - CompactSizeClassMap, - kRegionSizeLog, - FlatByteMap, - TestMapUnmapCallback> - Allocator32WithCallBack; + typedef SizeClassAllocator32 Allocator32WithCallBack; Allocator32WithCallBack *a = new Allocator32WithCallBack; a->Init(kReleaseToOSIntervalNever); EXPECT_EQ(TestMapUnmapCallback::map_count, 0); diff --git a/lib/sanitizer_common/tests/sanitizer_test_utils.h b/lib/sanitizer_common/tests/sanitizer_test_utils.h index 9c162a66f547..b7728d9ea25e 100644 --- a/lib/sanitizer_common/tests/sanitizer_test_utils.h +++ b/lib/sanitizer_common/tests/sanitizer_test_utils.h @@ -124,4 +124,10 @@ static inline uint32_t my_rand() { # define SANITIZER_TEST_HAS_PRINTF_L 0 #endif +#if !defined(_MSC_VER) +# define SANITIZER_TEST_HAS_STRNDUP 1 +#else +# define SANITIZER_TEST_HAS_STRNDUP 0 +#endif + #endif // SANITIZER_TEST_UTILS_H diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp index 5420fc9649ca..ce69ddf55531 100644 --- a/lib/scudo/scudo_allocator.cpp +++ b/lib/scudo/scudo_allocator.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "scudo_allocator.h" +#include "scudo_crc32.h" #include "scudo_tls.h" #include "scudo_utils.h" @@ -34,21 +35,28 @@ static uptr Cookie; // at compilation or at runtime. static atomic_uint8_t HashAlgorithm = { CRC32Software }; -SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data); - -INLINE u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) { - // If SSE4.2 is defined here, it was enabled everywhere, as opposed to only - // for scudo_crc32.cpp. This means that other SSE instructions were likely - // emitted at other places, and as a result there is no reason to not use - // the hardware version of the CRC32. +INLINE u32 computeCRC32(uptr Crc, uptr Value, uptr *Array, uptr ArraySize) { + // If the hardware CRC32 feature is defined here, it was enabled everywhere, + // as opposed to only for scudo_crc32.cpp. This means that other hardware + // specific instructions were likely emitted at other places, and as a + // result there is no reason to not use it here. #if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) - return computeHardwareCRC32(Crc, Data); + Crc = CRC32_INTRINSIC(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = CRC32_INTRINSIC(Crc, Array[i]); + return Crc; #else - if (computeHardwareCRC32 && HashType == CRC32Hardware) - return computeHardwareCRC32(Crc, Data); - else - return computeSoftwareCRC32(Crc, Data); -#endif // defined(__SSE4_2__) + if (atomic_load_relaxed(&HashAlgorithm) == CRC32Hardware) { + Crc = computeHardwareCRC32(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = computeHardwareCRC32(Crc, Array[i]); + return Crc; + } + Crc = computeSoftwareCRC32(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = computeSoftwareCRC32(Crc, Array[i]); + return Crc; +#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } static ScudoBackendAllocator &getBackendAllocator(); @@ -65,8 +73,9 @@ struct ScudoChunk : UnpackedHeader { // Returns the usable size for a chunk, meaning the amount of bytes from the // beginning of the user data to the end of the backend allocated chunk. uptr getUsableSize(UnpackedHeader *Header) { - uptr Size = getBackendAllocator().GetActuallyAllocatedSize( - getAllocBeg(Header)); + uptr Size = + getBackendAllocator().GetActuallyAllocatedSize(getAllocBeg(Header), + Header->FromPrimary); if (Size == 0) return 0; return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog); @@ -78,10 +87,8 @@ struct ScudoChunk : UnpackedHeader { ZeroChecksumHeader.Checksum = 0; uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)]; memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder)); - u8 HashType = atomic_load_relaxed(&HashAlgorithm); - u32 Crc = computeCRC32(Cookie, reinterpret_cast(this), HashType); - for (uptr i = 0; i < ARRAY_SIZE(HeaderHolder); i++) - Crc = computeCRC32(Crc, HeaderHolder[i], HashType); + u32 Crc = computeCRC32(Cookie, reinterpret_cast(this), HeaderHolder, + ARRAY_SIZE(HeaderHolder)); return static_cast(Crc); } @@ -195,10 +202,10 @@ void initScudo() { CHECK(!ScudoInitIsRunning && "Scudo init calls itself!"); ScudoInitIsRunning = true; - // Check is SSE4.2 is supported, if so, opt for the CRC32 hardware version. - if (testCPUFeature(CRC32CPUFeature)) { + // Check if hardware CRC32 is supported in the binary and by the platform, if + // so, opt for the CRC32 hardware version of the checksum. + if (computeHardwareCRC32 && testCPUFeature(CRC32CPUFeature)) atomic_store_relaxed(&HashAlgorithm, CRC32Hardware); - } initFlags(); @@ -215,7 +222,8 @@ struct QuarantineCallback { explicit QuarantineCallback(AllocatorCache *Cache) : Cache_(Cache) {} - // Chunk recycling function, returns a quarantined chunk to the backend. + // Chunk recycling function, returns a quarantined chunk to the backend, + // first making sure it hasn't been tampered with. void Recycle(ScudoChunk *Chunk) { UnpackedHeader Header; Chunk->loadHeader(&Header); @@ -225,17 +233,19 @@ struct QuarantineCallback { } Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(&Header); - getBackendAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr, Header.FromPrimary); } - /// Internal quarantine allocation and deallocation functions. + // Internal quarantine allocation and deallocation functions. We first check + // that the batches are indeed serviced by the Primary. + // TODO(kostyak): figure out the best way to protect the batches. + COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize); void *Allocate(uptr Size) { - // TODO(kostyak): figure out the best way to protect the batches. - return getBackendAllocator().Allocate(Cache_, Size, MinAlignment); + return getBackendAllocator().Allocate(Cache_, Size, MinAlignment, true); } void Deallocate(void *Ptr) { - getBackendAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr, true); } AllocatorCache *Cache_; @@ -353,58 +363,55 @@ struct ScudoAllocator { Size = 1; uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize; - if (Alignment > MinAlignment) - NeededSize += Alignment; - if (NeededSize >= MaxAllowedMallocSize) + uptr AlignedSize = (Alignment > MinAlignment) ? + NeededSize + (Alignment - AlignedChunkHeaderSize) : NeededSize; + if (AlignedSize >= MaxAllowedMallocSize) return BackendAllocator.ReturnNullOrDieOnBadRequest(); - // Primary backed and Secondary backed allocations have a different - // treatment. We deal with alignment requirements of Primary serviced - // allocations here, but the Secondary will take care of its own alignment - // needs, which means we also have to work around some limitations of the - // combined allocator to accommodate the situation. - bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment); + // Primary and Secondary backed allocations have a different treatment. We + // deal with alignment requirements of Primary serviced allocations here, + // but the Secondary will take care of its own alignment needs. + bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment); void *Ptr; uptr Salt; + uptr AllocationSize = FromPrimary ? AlignedSize : NeededSize; uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment; ScudoThreadContext *ThreadContext = getThreadContextAndLock(); if (LIKELY(ThreadContext)) { Salt = getPrng(ThreadContext)->getNext(); Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext), - NeededSize, AllocationAlignment); + AllocationSize, AllocationAlignment, + FromPrimary); ThreadContext->unlock(); } else { SpinMutexLock l(&FallbackMutex); Salt = FallbackPrng.getNext(); - Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize, - AllocationAlignment); + Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, AllocationSize, + AllocationAlignment, FromPrimary); } if (!Ptr) return BackendAllocator.ReturnNullOrDieOnOOM(); - uptr AllocBeg = reinterpret_cast(Ptr); - // If the allocation was serviced by the secondary, the returned pointer - // accounts for ChunkHeaderSize to pass the alignment check of the combined - // allocator. Adjust it here. - if (!FromPrimary) { - AllocBeg -= AlignedChunkHeaderSize; - if (Alignment > MinAlignment) - NeededSize -= Alignment; - } - // If requested, we will zero out the entire contents of the returned chunk. if ((ForceZeroContents || ZeroContents) && FromPrimary) - memset(Ptr, 0, BackendAllocator.GetActuallyAllocatedSize(Ptr)); + memset(Ptr, 0, + BackendAllocator.GetActuallyAllocatedSize(Ptr, FromPrimary)); + UnpackedHeader Header = {}; + uptr AllocBeg = reinterpret_cast(Ptr); uptr UserBeg = AllocBeg + AlignedChunkHeaderSize; - if (!IsAligned(UserBeg, Alignment)) + if (!IsAligned(UserBeg, Alignment)) { + // Since the Secondary takes care of alignment, a non-aligned pointer + // means it is from the Primary. It is also the only case where the offset + // field of the header would be non-zero. + CHECK(FromPrimary); UserBeg = RoundUpTo(UserBeg, Alignment); - CHECK_LE(UserBeg + Size, AllocBeg + NeededSize); - UnpackedHeader Header = {}; + uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg; + Header.Offset = Offset >> MinAlignmentLog; + } + CHECK_LE(UserBeg + Size, AllocBeg + AllocationSize); Header.State = ChunkAllocated; - uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg; - Header.Offset = Offset >> MinAlignmentLog; Header.AllocType = Type; if (FromPrimary) { Header.FromPrimary = FromPrimary; @@ -431,17 +438,20 @@ struct ScudoAllocator { // with no additional security value. void quarantineOrDeallocateChunk(ScudoChunk *Chunk, UnpackedHeader *Header, uptr Size) { + bool FromPrimary = Header->FromPrimary; bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0); if (BypassQuarantine) { Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(Header); ScudoThreadContext *ThreadContext = getThreadContextAndLock(); if (LIKELY(ThreadContext)) { - getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr); + getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr, + FromPrimary); ThreadContext->unlock(); } else { SpinMutexLock Lock(&FallbackMutex); - getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr); + getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr, + FromPrimary); } } else { UnpackedHeader NewHeader = *Header; diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h index f159deffb1d5..523808750eec 100644 --- a/lib/scudo/scudo_allocator.h +++ b/lib/scudo/scudo_allocator.h @@ -80,7 +80,7 @@ const uptr AllocatorSize = 0x10000000000ULL; // 1T. const uptr AllocatorSize = 0x40000000000ULL; // 4T. # endif typedef DefaultSizeClassMap SizeClassMap; -struct AP { +struct AP64 { static const uptr kSpaceBeg = AllocatorSpace; static const uptr kSpaceSize = AllocatorSize; static const uptr kMetadataSize = 0; @@ -89,7 +89,7 @@ struct AP { static const uptr kFlags = SizeClassAllocator64FlagMasks::kRandomShuffleChunks; }; -typedef SizeClassAllocator64 PrimaryAllocator; +typedef SizeClassAllocator64 PrimaryAllocator; #else // Currently, the 32-bit Sanitizer allocator has not yet benefited from all the // security improvements brought to the 64-bit one. This makes the 32-bit @@ -102,16 +102,27 @@ typedef FlatByteMap ByteMap; typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap; # endif // SANITIZER_WORDSIZE typedef DefaultSizeClassMap SizeClassMap; -typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap, - RegionSizeLog, ByteMap> PrimaryAllocator; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = 0; + typedef __scudo::SizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = RegionSizeLog; + typedef __scudo::ByteMap ByteMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = + SizeClassAllocator32FlagMasks::kRandomShuffleChunks; +}; +typedef SizeClassAllocator32 PrimaryAllocator; #endif // SANITIZER_CAN_USE_ALLOCATOR64 #include "scudo_allocator_secondary.h" +#include "scudo_allocator_combined.h" typedef SizeClassAllocatorLocalCache AllocatorCache; typedef ScudoLargeMmapAllocator SecondaryAllocator; -typedef CombinedAllocator - ScudoBackendAllocator; +typedef ScudoCombinedAllocator ScudoBackendAllocator; void initScudo(); diff --git a/lib/scudo/scudo_allocator_combined.h b/lib/scudo/scudo_allocator_combined.h new file mode 100644 index 000000000000..c978db55a9d9 --- /dev/null +++ b/lib/scudo/scudo_allocator_combined.h @@ -0,0 +1,84 @@ +//===-- scudo_allocator_combined.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// Scudo Combined Allocator, dispatches allocation & deallocation requests to +/// the Primary or the Secondary backend allocators. +/// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_ALLOCATOR_COMBINED_H_ +#define SCUDO_ALLOCATOR_COMBINED_H_ + +#ifndef SCUDO_ALLOCATOR_H_ +#error "This file must be included inside scudo_allocator.h." +#endif + +template +class ScudoCombinedAllocator { + public: + void Init(bool AllocatorMayReturnNull, s32 ReleaseToOSIntervalMs) { + Primary.Init(ReleaseToOSIntervalMs); + Secondary.Init(AllocatorMayReturnNull); + Stats.Init(); + atomic_store_relaxed(&MayReturnNull, AllocatorMayReturnNull); + } + + void *Allocate(AllocatorCache *Cache, uptr Size, uptr Alignment, + bool FromPrimary) { + if (FromPrimary) + return Cache->Allocate(&Primary, Primary.ClassID(Size)); + return Secondary.Allocate(&Stats, Size, Alignment); + } + + void *ReturnNullOrDieOnBadRequest() { + if (atomic_load_relaxed(&MayReturnNull)) + return nullptr; + ReportAllocatorCannotReturnNull(false); + } + + void *ReturnNullOrDieOnOOM() { + if (atomic_load_relaxed(&MayReturnNull)) + return nullptr; + ReportAllocatorCannotReturnNull(true); + } + + void Deallocate(AllocatorCache *Cache, void *Ptr, bool FromPrimary) { + if (FromPrimary) + Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr); + else + Secondary.Deallocate(&Stats, Ptr); + } + + uptr GetActuallyAllocatedSize(void *Ptr, bool FromPrimary) { + if (FromPrimary) + return Primary.GetActuallyAllocatedSize(Ptr); + return Secondary.GetActuallyAllocatedSize(Ptr); + } + + void InitCache(AllocatorCache *Cache) { + Cache->Init(&Stats); + } + + void DestroyCache(AllocatorCache *Cache) { + Cache->Destroy(&Primary, &Stats); + } + + void GetStats(AllocatorStatCounters StatType) const { + Stats.Get(StatType); + } + + private: + PrimaryAllocator Primary; + SecondaryAllocator Secondary; + AllocatorGlobalStats Stats; + atomic_uint8_t MayReturnNull; +}; + +#endif // SCUDO_ALLOCATOR_COMBINED_H_ diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h index fbc7f247d708..2950909b547e 100644 --- a/lib/scudo/scudo_allocator_secondary.h +++ b/lib/scudo/scudo_allocator_secondary.h @@ -26,20 +26,19 @@ class ScudoLargeMmapAllocator { void Init(bool AllocatorMayReturnNull) { PageSize = GetPageSizeCached(); - atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_relaxed); + atomic_store_relaxed(&MayReturnNull, AllocatorMayReturnNull); } void *Allocate(AllocatorStats *Stats, uptr Size, uptr Alignment) { + uptr UserSize = Size - AlignedChunkHeaderSize; // The Scudo frontend prevents us from allocating more than // MaxAllowedMallocSize, so integer overflow checks would be superfluous. uptr MapSize = Size + SecondaryHeaderSize; + if (Alignment > MinAlignment) + MapSize += Alignment; MapSize = RoundUpTo(MapSize, PageSize); // Account for 2 guard pages, one before and one after the chunk. MapSize += 2 * PageSize; - // The size passed to the Secondary comprises the alignment, if large - // enough. Subtract it here to get the requested size, including header. - if (Alignment > MinAlignment) - Size -= Alignment; uptr MapBeg = reinterpret_cast(MmapNoAccess(MapSize)); if (MapBeg == ~static_cast(0)) @@ -51,32 +50,32 @@ class ScudoLargeMmapAllocator { // initial guard page, and both headers. This is the pointer that has to // abide by alignment requirements. uptr UserBeg = MapBeg + PageSize + HeadersSize; + uptr UserEnd = UserBeg + UserSize; // In the rare event of larger alignments, we will attempt to fit the mmap // area better and unmap extraneous memory. This will also ensure that the // offset and unused bytes field of the header stay small. if (Alignment > MinAlignment) { - if (UserBeg & (Alignment - 1)) - UserBeg += Alignment - (UserBeg & (Alignment - 1)); - CHECK_GE(UserBeg, MapBeg); - uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) - PageSize; - CHECK_GE(NewMapBeg, MapBeg); - uptr NewMapEnd = RoundUpTo(UserBeg + (Size - AlignedChunkHeaderSize), - PageSize) + PageSize; - CHECK_LE(NewMapEnd, MapEnd); - // Unmap the extra memory if it's large enough, on both sides. - uptr Diff = NewMapBeg - MapBeg; - if (Diff > PageSize) - UnmapOrDie(reinterpret_cast(MapBeg), Diff); - Diff = MapEnd - NewMapEnd; - if (Diff > PageSize) - UnmapOrDie(reinterpret_cast(NewMapEnd), Diff); - MapBeg = NewMapBeg; - MapEnd = NewMapEnd; - MapSize = NewMapEnd - NewMapBeg; + if (!IsAligned(UserBeg, Alignment)) { + UserBeg = RoundUpTo(UserBeg, Alignment); + CHECK_GE(UserBeg, MapBeg); + uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) - + PageSize; + CHECK_GE(NewMapBeg, MapBeg); + if (NewMapBeg != MapBeg) { + UnmapOrDie(reinterpret_cast(MapBeg), NewMapBeg - MapBeg); + MapBeg = NewMapBeg; + } + UserEnd = UserBeg + UserSize; + } + uptr NewMapEnd = RoundUpTo(UserEnd, PageSize) + PageSize; + if (NewMapEnd != MapEnd) { + UnmapOrDie(reinterpret_cast(NewMapEnd), MapEnd - NewMapEnd); + MapEnd = NewMapEnd; + } + MapSize = MapEnd - MapBeg; } - uptr UserEnd = UserBeg + (Size - AlignedChunkHeaderSize); CHECK_LE(UserEnd, MapEnd - PageSize); // Actually mmap the memory, preserving the guard pages on either side. CHECK_EQ(MapBeg + PageSize, reinterpret_cast( @@ -94,25 +93,15 @@ class ScudoLargeMmapAllocator { Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize); } - return reinterpret_cast(UserBeg); - } - - void *ReturnNullOrDieOnBadRequest() { - if (atomic_load(&MayReturnNull, memory_order_acquire)) - return nullptr; - ReportAllocatorCannotReturnNull(false); + return reinterpret_cast(Ptr); } void *ReturnNullOrDieOnOOM() { - if (atomic_load(&MayReturnNull, memory_order_acquire)) + if (atomic_load_relaxed(&MayReturnNull)) return nullptr; ReportAllocatorCannotReturnNull(true); } - void SetMayReturnNull(bool AllocatorMayReturnNull) { - atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_release); - } - void Deallocate(AllocatorStats *Stats, void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); { @@ -123,14 +112,6 @@ class ScudoLargeMmapAllocator { UnmapOrDie(reinterpret_cast(Header->MapBeg), Header->MapSize); } - uptr TotalMemoryUsed() { - UNIMPLEMENTED(); - } - - bool PointerIsMine(const void *Ptr) { - UNIMPLEMENTED(); - } - uptr GetActuallyAllocatedSize(void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); // Deduct PageSize as MapSize includes the trailing guard page. @@ -138,39 +119,9 @@ class ScudoLargeMmapAllocator { return MapEnd - reinterpret_cast(Ptr); } - void *GetMetaData(const void *Ptr) { - UNIMPLEMENTED(); - } - - void *GetBlockBegin(const void *Ptr) { - UNIMPLEMENTED(); - } - - void *GetBlockBeginFastLocked(void *Ptr) { - UNIMPLEMENTED(); - } - - void PrintStats() { - UNIMPLEMENTED(); - } - - void ForceLock() { - UNIMPLEMENTED(); - } - - void ForceUnlock() { - UNIMPLEMENTED(); - } - - void ForEachChunk(ForEachChunkCallback Callback, void *Arg) { - UNIMPLEMENTED(); - } - private: // A Secondary allocated chunk header contains the base of the mapping and - // its size. Currently, the base is always a page before the header, but - // we might want to extend that number in the future based on the size of - // the allocation. + // its size, which comprises the guard pages. struct SecondaryHeader { uptr MapBeg; uptr MapSize; diff --git a/lib/scudo/scudo_crc32.cpp b/lib/scudo/scudo_crc32.cpp index 56be22f4ee62..a267dc4e3fb8 100644 --- a/lib/scudo/scudo_crc32.cpp +++ b/lib/scudo/scudo_crc32.cpp @@ -12,24 +12,7 @@ /// //===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_internal_defs.h" - -// Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 -// - for ARM & AArch64: -march=armv8-a+crc or -mcrc -// An additional check must be performed at runtime as well to make sure the -// emitted instructions are valid on the target host. - -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) -# ifdef __SSE4_2__ -# include -# define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) -# endif -# ifdef __ARM_FEATURE_CRC32 -# include -# define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) -# endif -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#include "scudo_crc32.h" namespace __scudo { diff --git a/lib/scudo/scudo_crc32.h b/lib/scudo/scudo_crc32.h new file mode 100644 index 000000000000..5ffcc62658cc --- /dev/null +++ b/lib/scudo/scudo_crc32.h @@ -0,0 +1,101 @@ +//===-- scudo_crc32.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// Scudo chunk header checksum related definitions. +/// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CRC32_H_ +#define SCUDO_CRC32_H_ + +#include "sanitizer_common/sanitizer_internal_defs.h" + +// Hardware CRC32 is supported at compilation via the following: +// - for i386 & x86_64: -msse4.2 +// - for ARM & AArch64: -march=armv8-a+crc or -mcrc +// An additional check must be performed at runtime as well to make sure the +// emitted instructions are valid on the target host. + +#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +# ifdef __SSE4_2__ +# include +# define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) +# endif +# ifdef __ARM_FEATURE_CRC32 +# include +# define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) +# endif +#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) + +namespace __scudo { + +enum : u8 { + CRC32Software = 0, + CRC32Hardware = 1, +}; + +const static u32 CRC32Table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { + for (uptr i = 0; i < sizeof(Data); i++) { + Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); + Data >>= 8; + } + return Crc; +} + +SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data); + +} // namespace __scudo + +#endif // SCUDO_CRC32_H_ diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h index 484b0c859e3d..7198476f42cf 100644 --- a/lib/scudo/scudo_utils.h +++ b/lib/scudo/scudo_utils.h @@ -53,65 +53,6 @@ struct Xorshift128Plus { u64 State[2]; }; -enum : u8 { - CRC32Software = 0, - CRC32Hardware = 1, -}; - -const static u32 CRC32Table[] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, - 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, - 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, - 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, - 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, - 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, - 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, - 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, - 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, - 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, - 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, - 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, - 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, - 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, - 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, - 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, - 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, - 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { - for (uptr i = 0; i < sizeof(Data); i++) { - Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); - Data >>= 8; - } - return Crc; -} - } // namespace __scudo #endif // SCUDO_UTILS_H_ diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh index d454ec2dd0fd..22eb444198a4 100755 --- a/lib/tsan/check_analyze.sh +++ b/lib/tsan/check_analyze.sh @@ -29,7 +29,7 @@ check() { for f in write1 write2 write4 write8; do check $f rsp 1 check $f push 2 - check $f pop 2 + check $f pop 12 done for f in read1 read2 read4 read8; do diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h index e92a0f35705e..2cf2e168454d 100644 --- a/lib/tsan/rtl/tsan_rtl.h +++ b/lib/tsan/rtl/tsan_rtl.h @@ -55,16 +55,22 @@ namespace __tsan { #if !SANITIZER_GO struct MapUnmapCallback; #if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) -static const uptr kAllocatorSpace = 0; -static const uptr kAllocatorSize = SANITIZER_MMAP_RANGE_SIZE; static const uptr kAllocatorRegionSizeLog = 20; static const uptr kAllocatorNumRegions = - kAllocatorSize >> kAllocatorRegionSizeLog; + SANITIZER_MMAP_RANGE_SIZE >> kAllocatorRegionSizeLog; typedef TwoLevelByteMap<(kAllocatorNumRegions >> 12), 1 << 12, MapUnmapCallback> ByteMap; -typedef SizeClassAllocator32 PrimaryAllocator; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = 0; + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = kAllocatorRegionSizeLog; + typedef __tsan::ByteMap ByteMap; + typedef __tsan::MapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +typedef SizeClassAllocator32 PrimaryAllocator; #else struct AP64 { // Allocator64 parameters. Deliberately using a short name. static const uptr kSpaceBeg = Mapping::kHeapMemBeg; diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc index 8d1c7c5d807f..f26e77dd7fc1 100644 --- a/lib/xray/xray_AArch64.cc +++ b/lib/xray/xray_AArch64.cc @@ -18,8 +18,7 @@ #include #include - -extern "C" void __clear_cache(void* start, void* end); +extern "C" void __clear_cache(void *start, void *end); namespace __xray { @@ -86,8 +85,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } @@ -107,6 +106,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64? + return false; +} + // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc index 26d673ec23a0..da4efcdd2b17 100644 --- a/lib/xray/xray_arm.cc +++ b/lib/xray/xray_arm.cc @@ -18,7 +18,7 @@ #include #include -extern "C" void __clear_cache(void* start, void* end); +extern "C" void __clear_cache(void *start, void *end); namespace __xray { @@ -122,8 +122,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } @@ -143,6 +143,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? + return false; +} + // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h index 36d9410d16f6..3d6d38892c76 100644 --- a/lib/xray/xray_fdr_log_records.h +++ b/lib/xray/xray_fdr_log_records.h @@ -29,6 +29,7 @@ struct alignas(16) MetadataRecord { NewCPUId, TSCWrap, WalltimeMarker, + CustomEventMarker, }; // Use 7 bits to identify this record type. /* RecordKinds */ uint8_t RecordKind : 7; diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc index e538b477a3de..a7e1382c3865 100644 --- a/lib/xray/xray_fdr_logging.cc +++ b/lib/xray/xray_fdr_logging.cc @@ -41,45 +41,12 @@ namespace __xray { // Global BufferQueue. std::shared_ptr BQ; -__sanitizer::atomic_sint32_t LoggingStatus = { - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; - __sanitizer::atomic_sint32_t LogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; -std::unique_ptr FDROptions; - -XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax, - void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - if (OptionsSize != sizeof(FDRLoggingOptions)) - return static_cast(__sanitizer::atomic_load( - &LoggingStatus, __sanitizer::memory_order_acquire)); - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - if (!__sanitizer::atomic_compare_exchange_strong( - &LoggingStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_INITIALIZING, - __sanitizer::memory_order_release)) - return static_cast(CurrentStatus); - - FDROptions.reset(new FDRLoggingOptions()); - memcpy(FDROptions.get(), Options, OptionsSize); - bool Success = false; - BQ = std::make_shared(BufferSize, BufferMax, Success); - if (!Success) { - Report("BufferQueue init failed.\n"); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - - // Install the actual handleArg0 handler after initialising the buffers. - __xray_set_handler(fdrLoggingHandleArg0); +FDRLoggingOptions FDROptions; - __sanitizer::atomic_store(&LoggingStatus, - XRayLogInitStatus::XRAY_LOG_INITIALIZED, - __sanitizer::memory_order_release); - Report("XRay FDR init successful.\n"); - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} +__sanitizer::SpinMutex FDROptionsMutex; // Must finalize before flushing. XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { @@ -108,7 +75,11 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { // (fixed-sized) and let the tools reading the buffers deal with the data // afterwards. // - int Fd = FDROptions->Fd; + int Fd = -1; + { + __sanitizer::SpinMutexLock Guard(&FDROptionsMutex); + Fd = FDROptions.Fd; + } if (Fd == -1) Fd = getLogFD(); if (Fd == -1) { @@ -120,8 +91,8 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { // Test for required CPU features and cache the cycle frequency static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() - : __xray::NanosecondsPerSecond; + static uint64_t CycleFrequency = + TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; XRayFileHeader Header; Header.Version = 1; @@ -192,8 +163,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT { return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } -void fdrLoggingHandleArg0(int32_t FuncId, - XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { +static std::tuple +getTimestamp() XRAY_NEVER_INSTRUMENT { // We want to get the TSC as early as possible, so that we can check whether // we've seen this CPU before. We also do it before we load anything else, to // allow for forward progress with the scheduling. @@ -203,7 +174,7 @@ void fdrLoggingHandleArg0(int32_t FuncId, // Test once for required CPU features static bool TSCSupported = probeRequiredCPUFeatures(); - if(TSCSupported) { + if (TSCSupported) { TSC = __xray::readTSC(CPU); } else { // FIXME: This code needs refactoring as it appears in multiple locations @@ -216,9 +187,102 @@ void fdrLoggingHandleArg0(int32_t FuncId, CPU = 0; TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; } + return std::make_tuple(TSC, CPU); +} + +void fdrLoggingHandleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + auto TSC_CPU = getTimestamp(); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), + std::get<1>(TSC_CPU), clock_gettime, + LoggingStatus, BQ); +} - __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU, - clock_gettime, LoggingStatus, BQ); +void fdrLoggingHandleCustomEvent(void *Event, + std::size_t EventSize) XRAY_NEVER_INSTRUMENT { + using namespace __xray_fdr_internal; + auto TSC_CPU = getTimestamp(); + auto &TSC = std::get<0>(TSC_CPU); + auto &CPU = std::get<1>(TSC_CPU); + thread_local bool Running = false; + RecursionGuard Guard{Running}; + if (!Guard) { + assert(Running && "RecursionGuard is buggy!"); + return; + } + if (EventSize > std::numeric_limits::max()) { + using Empty = struct {}; + static Empty Once = [&] { + Report("Event size too large = %zu ; > max = %d\n", EventSize, + std::numeric_limits::max()); + return Empty(); + }(); + (void)Once; + } + int32_t ReducedEventSize = static_cast(EventSize); + if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime)) + return; + + // Here we need to prepare the log to handle: + // - The metadata record we're going to write. (16 bytes) + // - The additional data we're going to write. Currently, that's the size of + // the event we're going to dump into the log as free-form bytes. + if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) { + LocalBQ = nullptr; + return; + } + + // Write the custom event metadata record, which consists of the following + // information: + // - 8 bytes (64-bits) for the full TSC when the event started. + // - 4 bytes (32-bits) for the length of the data. + MetadataRecord CustomEvent; + CustomEvent.Type = uint8_t(RecordType::Metadata); + CustomEvent.RecordKind = + uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); + constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU)); + std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); + std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); + std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent)); + RecordPtr += sizeof(CustomEvent); + std::memcpy(RecordPtr, Event, ReducedEventSize); + endBufferIfFull(); +} + +XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax, + void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + if (OptionsSize != sizeof(FDRLoggingOptions)) + return static_cast(__sanitizer::atomic_load( + &LoggingStatus, __sanitizer::memory_order_acquire)); + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!__sanitizer::atomic_compare_exchange_strong( + &LoggingStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZING, + __sanitizer::memory_order_release)) + return static_cast(CurrentStatus); + + { + __sanitizer::SpinMutexLock Guard(&FDROptionsMutex); + memcpy(&FDROptions, Options, OptionsSize); + } + + bool Success = false; + BQ = std::make_shared(BufferSize, BufferMax, Success); + if (!Success) { + Report("BufferQueue init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + // Install the actual handleArg0 handler after initialising the buffers. + __xray_set_handler(fdrLoggingHandleArg0); + __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); + + __sanitizer::atomic_store(&LoggingStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZED, + __sanitizer::memory_order_release); + Report("XRay FDR init successful.\n"); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } } // namespace __xray diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h index ce360cb03ea7..4a1d80fd0eba 100644 --- a/lib/xray/xray_fdr_logging_impl.h +++ b/lib/xray/xray_fdr_logging_impl.h @@ -37,6 +37,9 @@ namespace __xray { +__sanitizer::atomic_sint32_t LoggingStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + /// We expose some of the state transitions when FDR logging mode is operating /// such that we can simulate a series of log events that may occur without /// and test with determinism without worrying about the real CPU time. @@ -123,12 +126,21 @@ thread_local uint8_t NumTailCalls = 0; constexpr auto MetadataRecSize = sizeof(MetadataRecord); constexpr auto FunctionRecSize = sizeof(FunctionRecord); +// We use a thread_local variable to keep track of which CPUs we've already +// run, and the TSC times for these CPUs. This allows us to stop repeating the +// CPU field in the function records. +// +// We assume that we'll support only 65536 CPUs for x86_64. +thread_local uint16_t CurrentCPU = std::numeric_limits::max(); +thread_local uint64_t LastTSC = 0; +thread_local uint64_t LastFunctionEntryTSC = 0; + class ThreadExitBufferCleanup { - std::weak_ptr Buffers; + std::shared_ptr &Buffers; BufferQueue::Buffer &Buffer; public: - explicit ThreadExitBufferCleanup(std::weak_ptr BQ, + explicit ThreadExitBufferCleanup(std::shared_ptr &BQ, BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT : Buffers(BQ), Buffer(Buffer) {} @@ -142,17 +154,24 @@ public: // the queue. assert((RecordPtr + MetadataRecSize) - static_cast(Buffer.Buffer) >= static_cast(MetadataRecSize)); - if (auto BQ = Buffers.lock()) { + if (Buffers) { writeEOBMetadata(); - auto EC = BQ->releaseBuffer(Buffer); + auto EC = Buffers->releaseBuffer(Buffer); if (EC != BufferQueue::ErrorCode::Ok) Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, BufferQueue::getErrorString(EC)); + Buffers = nullptr; return; } } }; +// Make sure a thread that's ever called handleArg0 has a thread-local +// live reference to the buffer queue for this particular instance of +// FDRLogging, and that we're going to clean it up when the thread exits. +thread_local std::shared_ptr LocalBQ = nullptr; +thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer); + class RecursionGuard { bool &Running; const bool Valid; @@ -176,7 +195,7 @@ public: } }; -static inline bool loggingInitialized( +inline bool loggingInitialized( const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT { return __sanitizer::atomic_load(&LoggingStatus, __sanitizer::memory_order_acquire) == @@ -185,8 +204,8 @@ static inline bool loggingInitialized( } // namespace -static inline void writeNewBufferPreamble(pid_t Tid, timespec TS, - char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeNewBufferPreamble(pid_t Tid, timespec TS, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { static constexpr int InitRecordsCount = 2; std::aligned_storage::type Records[InitRecordsCount]; { @@ -222,9 +241,8 @@ static inline void writeNewBufferPreamble(pid_t Tid, timespec TS, NumTailCalls = 0; } -static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t, - struct timespec *)) - XRAY_NEVER_INSTRUMENT { +inline void setupNewBuffer(int (*wall_clock_reader)( + clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT { RecordPtr = static_cast(Buffer.Buffer); pid_t Tid = syscall(SYS_gettid); timespec TS{0, 0}; @@ -235,8 +253,8 @@ static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t, NumTailCalls = 0; } -static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, - char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord NewCPUId; NewCPUId.Type = uint8_t(RecordType::Metadata); NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId); @@ -253,12 +271,12 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, NumTailCalls = 0; } -static inline void writeNewCPUIdMetadata(uint16_t CPU, - uint64_t TSC) XRAY_NEVER_INSTRUMENT { +inline void writeNewCPUIdMetadata(uint16_t CPU, + uint64_t TSC) XRAY_NEVER_INSTRUMENT { writeNewCPUIdMetadata(CPU, TSC, RecordPtr); } -static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord EOBMeta; EOBMeta.Type = uint8_t(RecordType::Metadata); EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer); @@ -269,12 +287,12 @@ static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT { NumTailCalls = 0; } -static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { +inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { writeEOBMetadata(RecordPtr); } -static inline void writeTSCWrapMetadata(uint64_t TSC, - char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeTSCWrapMetadata(uint64_t TSC, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord TSCWrap; TSCWrap.Type = uint8_t(RecordType::Metadata); TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap); @@ -289,13 +307,13 @@ static inline void writeTSCWrapMetadata(uint64_t TSC, NumTailCalls = 0; } -static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { +inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { writeTSCWrapMetadata(TSC, RecordPtr); } -static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, - XRayEntryType EntryType, - char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, + XRayEntryType EntryType, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { std::aligned_storage::type AlignedFuncRecordBuffer; auto &FuncRecord = @@ -339,6 +357,17 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); break; + case XRayEntryType::CUSTOM_EVENT: { + // This is a bug in patching, so we'll report it once and move on. + static bool Once = [&] { + Report("Internal error: patched an XRay custom event call as a function; " + "func id = %d\n", + FuncId); + return true; + }(); + (void)Once; + return; + } } std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); @@ -346,8 +375,9 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, } static uint64_t thresholdTicks() { - static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() : - __xray::NanosecondsPerSecond; + static uint64_t TicksPerSec = probeRequiredCPUFeatures() + ? getTSCFrequency() + : __xray::NanosecondsPerSecond; static const uint64_t ThresholdTicks = TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000; return ThresholdTicks; @@ -397,9 +427,8 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, RewindingRecordPtr -= FunctionRecSize; RewindingTSC -= ExpectedTailExit.TSCDelta; AlignedFuncStorage FunctionEntryBuffer; - const auto &ExpectedFunctionEntry = - *reinterpret_cast(std::memcpy( - &FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize)); + const auto &ExpectedFunctionEntry = *reinterpret_cast( + std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize)); assert(ExpectedFunctionEntry.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && "Expected to find function entry when rewinding tail call."); @@ -422,7 +451,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, } } -static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) { +inline bool releaseThreadLocalBuffer(BufferQueue *BQ) { auto EC = BQ->releaseBuffer(Buffer); if (EC != BufferQueue::ErrorCode::Ok) { Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, @@ -432,11 +461,29 @@ static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) { return true; } -static inline void processFunctionHook( - int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, - int (*wall_clock_reader)(clockid_t, struct timespec *), - __sanitizer::atomic_sint32_t &LoggingStatus, - const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { +inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t, + struct timespec *), + size_t MaxSize) XRAY_NEVER_INSTRUMENT { + char *BufferStart = static_cast(Buffer.Buffer); + if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) { + writeEOBMetadata(); + if (!releaseThreadLocalBuffer(LocalBQ.get())) + return false; + auto EC = LocalBQ->getBuffer(Buffer); + if (EC != BufferQueue::ErrorCode::Ok) { + Report("Failed to acquire a buffer; error=%s\n", + BufferQueue::getErrorString(EC)); + return false; + } + setupNewBuffer(wall_clock_reader); + } + return true; +} + +inline bool isLogInitializedAndReady( + std::shared_ptr &LocalBQ, uint64_t TSC, unsigned char CPU, + int (*wall_clock_reader)(clockid_t, + struct timespec *)) XRAY_NEVER_INSTRUMENT { // Bail out right away if logging is not initialized yet. // We should take the opportunity to release the buffer though. auto Status = __sanitizer::atomic_load(&LoggingStatus, @@ -446,44 +493,19 @@ static inline void processFunctionHook( (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) { writeEOBMetadata(); - if (!releaseThreadLocalBuffer(BQ.get())) - return; + if (!releaseThreadLocalBuffer(LocalBQ.get())) + return false; RecordPtr = nullptr; + LocalBQ = nullptr; + return false; } - return; - } - - // We use a thread_local variable to keep track of which CPUs we've already - // run, and the TSC times for these CPUs. This allows us to stop repeating the - // CPU field in the function records. - // - // We assume that we'll support only 65536 CPUs for x86_64. - thread_local uint16_t CurrentCPU = std::numeric_limits::max(); - thread_local uint64_t LastTSC = 0; - thread_local uint64_t LastFunctionEntryTSC = 0; - - // Make sure a thread that's ever called handleArg0 has a thread-local - // live reference to the buffer queue for this particular instance of - // FDRLogging, and that we're going to clean it up when the thread exits. - thread_local auto LocalBQ = BQ; - thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer); - - // Prevent signal handler recursion, so in case we're already in a log writing - // mode and the signal handler comes in (and is also instrumented) then we - // don't want to be clobbering potentially partial writes already happening in - // the thread. We use a simple thread_local latch to only allow one on-going - // handleArg0 to happen at any given time. - thread_local bool Running = false; - RecursionGuard Guard{Running}; - if (!Guard) { - assert(Running == true && "RecursionGuard is buggy!"); - return; + return false; } if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) { writeEOBMetadata(); - if (!releaseThreadLocalBuffer(BQ.get())) - return; + if (!releaseThreadLocalBuffer(LocalBQ.get())) + return false; RecordPtr = nullptr; } @@ -496,19 +518,57 @@ static inline void processFunctionHook( LS != XRayLogInitStatus::XRAY_LOG_FINALIZED) Report("Failed to acquire a buffer; error=%s\n", BufferQueue::getErrorString(EC)); - return; + return false; } setupNewBuffer(wall_clock_reader); } if (CurrentCPU == std::numeric_limits::max()) { - // This means this is the first CPU this thread has ever run on. We set the - // current CPU and record this as the first TSC we've seen. + // This means this is the first CPU this thread has ever run on. We set + // the current CPU and record this as the first TSC we've seen. CurrentCPU = CPU; writeNewCPUIdMetadata(CPU, TSC); } + return true; +} // namespace __xray_fdr_internal + +inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT { + auto BufferStart = static_cast(Buffer.Buffer); + if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) { + writeEOBMetadata(); + if (!releaseThreadLocalBuffer(LocalBQ.get())) + return; + RecordPtr = nullptr; + } +} + +inline void processFunctionHook( + int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, + int (*wall_clock_reader)(clockid_t, struct timespec *), + __sanitizer::atomic_sint32_t &LoggingStatus, + const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { + // Prevent signal handler recursion, so in case we're already in a log writing + // mode and the signal handler comes in (and is also instrumented) then we + // don't want to be clobbering potentially partial writes already happening in + // the thread. We use a simple thread_local latch to only allow one on-going + // handleArg0 to happen at any given time. + thread_local bool Running = false; + RecursionGuard Guard{Running}; + if (!Guard) { + assert(Running == true && "RecursionGuard is buggy!"); + return; + } + + // In case the reference has been cleaned up before, we make sure we + // initialize it to the provided BufferQueue. + if (LocalBQ == nullptr) + LocalBQ = BQ; + + if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader)) + return; + // Before we go setting up writing new function entries, we need to be really // careful about the pointer math we're doing. This means we need to ensure // that the record we are about to write is going to fit into the buffer, @@ -545,25 +605,15 @@ static inline void processFunctionHook( // bytes in the end of the buffer, we need to write out the EOB, get a new // Buffer, set it up properly before doing any further writing. // - char *BufferStart = static_cast(Buffer.Buffer); - if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart < - static_cast(MetadataRecSize)) { - writeEOBMetadata(); - if (!releaseThreadLocalBuffer(LocalBQ.get())) - return; - auto EC = LocalBQ->getBuffer(Buffer); - if (EC != BufferQueue::ErrorCode::Ok) { - Report("Failed to acquire a buffer; error=%s\n", - BufferQueue::getErrorString(EC)); - return; - } - setupNewBuffer(wall_clock_reader); + if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) { + LocalBQ = nullptr; + return; } // By this point, we are now ready to write at most 24 bytes (one metadata // record and one function record). - BufferStart = static_cast(Buffer.Buffer); - assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >= + assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - + static_cast(Buffer.Buffer) >= static_cast(MetadataRecSize) && "Misconfigured BufferQueue provided; Buffer size not large enough."); @@ -586,7 +636,6 @@ static inline void processFunctionHook( // FunctionRecord. In this case we write down just a FunctionRecord with // the correct TSC delta. // - uint32_t RecordTSCDelta = 0; if (CPU != CurrentCPU) { // We've moved to a new CPU. @@ -619,21 +668,27 @@ static inline void processFunctionHook( break; rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId); return; // without writing log. + case XRayEntryType::CUSTOM_EVENT: { + // This is a bug in patching, so we'll report it once and move on. + static bool Once = [&] { + Report("Internal error: patched an XRay custom event call as a function; " + "func id = %d", + FuncId); + return true; + }(); + (void)Once; + return; + } } writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr); // If we've exhausted the buffer by this time, we then release the buffer to // make sure that other threads may start using this buffer. - if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) { - writeEOBMetadata(); - if (!releaseThreadLocalBuffer(LocalBQ.get())) - return; - RecordPtr = nullptr; - } + endBufferIfFull(); } } // namespace __xray_fdr_internal - } // namespace __xray + #endif // XRAY_XRAY_FDR_LOGGING_IMPL_H diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc index 26f0ab122db2..c437a72e3f05 100644 --- a/lib/xray/xray_interface.cc +++ b/lib/xray/xray_interface.cc @@ -50,6 +50,9 @@ __sanitizer::atomic_uintptr_t XRayPatchedFunction{0}; // This is the function to call from the arg1-enabled sleds/trampolines. __sanitizer::atomic_uintptr_t XRayArgLogger{0}; +// This is the function to call when we encounter a custom event log call. +__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0}; + // MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo // any successful mprotect(...) changes. This is used to make a page writeable // and executable, and upon destruction if it was successful in doing so returns @@ -97,7 +100,19 @@ int __xray_set_handler(void (*entry)(int32_t, __sanitizer::memory_order_acquire)) { __sanitizer::atomic_store(&__xray::XRayPatchedFunction, - reinterpret_cast(entry), + reinterpret_cast(entry), + __sanitizer::memory_order_release); + return 1; + } + return 0; +} + +int __xray_set_customevent_handler(void (*entry)(void *, size_t)) + XRAY_NEVER_INSTRUMENT { + if (__sanitizer::atomic_load(&XRayInitialized, + __sanitizer::memory_order_acquire)) { + __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent, + reinterpret_cast(entry), __sanitizer::memory_order_release); return 1; } @@ -161,6 +176,9 @@ inline bool patchSled(const XRaySledEntry &Sled, bool Enable, case XRayEntryType::LOG_ARGS_ENTRY: Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry); break; + case XRayEntryType::CUSTOM_EVENT: + Success = patchCustomEvent(Enable, FuncId, Sled); + break; default: Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind)); return false; @@ -301,6 +319,7 @@ int __xray_set_handler_arg1(void (*Handler)(int32_t, XRayEntryType, uint64_t)) { __sanitizer::memory_order_release); return 1; } + int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h index ef0c6b15809b..4a2784612fcb 100644 --- a/lib/xray/xray_interface_internal.h +++ b/lib/xray/xray_interface_internal.h @@ -60,6 +60,7 @@ bool patchFunctionEntry(bool Enable, uint32_t FuncId, bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); bool patchFunctionTailExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); } // namespace __xray @@ -70,6 +71,7 @@ extern void __xray_FunctionEntry(); extern void __xray_FunctionExit(); extern void __xray_FunctionTailExit(); extern void __xray_ArgLoggerEntry(); +extern void __xray_CustomEvent(); } #endif diff --git a/lib/xray/xray_mips.cc b/lib/xray/xray_mips.cc index c8ff39936c5a..cd863304db29 100644 --- a/lib/xray/xray_mips.cc +++ b/lib/xray/xray_mips.cc @@ -95,7 +95,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, // B #44 if (Enable) { - uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xffff; + uint32_t LoTracingHookAddr = + reinterpret_cast(TracingHook) & 0xffff; uint32_t HiTracingHookAddr = (reinterpret_cast(TracingHook) >> 16) & 0xffff; uint32_t LoFunctionID = FuncId & 0xffff; @@ -151,6 +152,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + } // namespace __xray extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { diff --git a/lib/xray/xray_mips64.cc b/lib/xray/xray_mips64.cc index 21136848c8af..fa8fdd5abccc 100644 --- a/lib/xray/xray_mips64.cc +++ b/lib/xray/xray_mips64.cc @@ -93,7 +93,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, if (Enable) { uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xffff; - uint32_t HiTracingHookAddr = (reinterpret_cast(TracingHook) >> 16) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 16) & 0xffff; uint32_t HigherTracingHookAddr = (reinterpret_cast(TracingHook) >> 32) & 0xffff; uint32_t HighestTracingHookAddr = @@ -160,6 +161,11 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} } // namespace __xray extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { diff --git a/lib/xray/xray_powerpc64.cc b/lib/xray/xray_powerpc64.cc index 6a7554cfc1b6..ab03cb10042f 100644 --- a/lib/xray/xray_powerpc64.cc +++ b/lib/xray/xray_powerpc64.cc @@ -93,6 +93,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + } // namespace __xray extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { diff --git a/lib/xray/xray_trampoline_powerpc64_asm.S b/lib/xray/xray_trampoline_powerpc64_asm.S index d43231ead22c..250e2e5be67a 100644 --- a/lib/xray/xray_trampoline_powerpc64_asm.S +++ b/lib/xray/xray_trampoline_powerpc64_asm.S @@ -145,27 +145,91 @@ __xray_FunctionEntry: .p2align 4 __xray_FunctionExit: std 0, 16(1) - ld 0, -8(1) # FuncId - stdu 1, -72(1) -# Spill r3, f1, and vsr34, the return value registers. + stdu 1, -256(1) +# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. std 3, 32(1) - mr 3, 0 - addi 4, 1, 40 - stxsdx 1, 0, 4 + ld 3, 248(1) # FuncId + std 4, 40(1) addi 4, 1, 48 + stxsdx 1, 0, 4 + addi 4, 1, 56 + stxsdx 2, 0, 4 + addi 4, 1, 64 + stxsdx 3, 0, 4 + addi 4, 1, 72 + stxsdx 4, 0, 4 + addi 4, 1, 80 + stxsdx 5, 0, 4 + addi 4, 1, 88 + stxsdx 6, 0, 4 + addi 4, 1, 96 + stxsdx 7, 0, 4 + addi 4, 1, 104 + stxsdx 8, 0, 4 + addi 4, 1, 112 stxvd2x 34, 0, 4 + addi 4, 1, 128 + stxvd2x 35, 0, 4 + addi 4, 1, 144 + stxvd2x 36, 0, 4 + addi 4, 1, 160 + stxvd2x 37, 0, 4 + addi 4, 1, 176 + stxvd2x 38, 0, 4 + addi 4, 1, 192 + stxvd2x 39, 0, 4 + addi 4, 1, 208 + stxvd2x 40, 0, 4 + addi 4, 1, 224 + stxvd2x 41, 0, 4 + std 2, 240(1) mflr 0 - std 0, 64(1) + std 0, 248(1) + li 4, 1 bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType nop - ld 0, 64(1) - mtlr 0 - ld 3, 32(1) - addi 4, 1, 40 - lxsdx 1, 0, 4 + addi 4, 1, 48 + lxsdx 1, 0, 4 + addi 4, 1, 56 + lxsdx 2, 0, 4 + addi 4, 1, 64 + lxsdx 3, 0, 4 + addi 4, 1, 72 + lxsdx 4, 0, 4 + addi 4, 1, 80 + lxsdx 5, 0, 4 + addi 4, 1, 88 + lxsdx 6, 0, 4 + addi 4, 1, 96 + lxsdx 7, 0, 4 + addi 4, 1, 104 + lxsdx 8, 0, 4 + addi 4, 1, 112 lxvd2x 34, 0, 4 - addi 1, 1, 72 + addi 4, 1, 128 + lxvd2x 35, 0, 4 + addi 4, 1, 144 + lxvd2x 36, 0, 4 + addi 4, 1, 160 + lxvd2x 37, 0, 4 + addi 4, 1, 176 + lxvd2x 38, 0, 4 + addi 4, 1, 192 + lxvd2x 39, 0, 4 + addi 4, 1, 208 + lxvd2x 40, 0, 4 + addi 4, 1, 224 + lxvd2x 41, 0, 4 + ld 0, 248(1) + mtlr 0 + ld 2, 240(1) + ld 3, 32(1) + ld 4, 40(1) + + addi 1, 1, 256 ld 0, 16(1) blr diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S index 847ecef8d425..b59eedc4bb1b 100644 --- a/lib/xray/xray_trampoline_x86_64.S +++ b/lib/xray/xray_trampoline_x86_64.S @@ -176,9 +176,15 @@ __xray_ArgLoggerEntry: je .Larg1entryFail .Larg1entryLog: - movq %rdi, %rdx // first argument will become the third - xorq %rsi, %rsi // XRayEntryType::ENTRY into the second - movl %r10d, %edi // 32-bit function ID becomes the first + + // First argument will become the third + movq %rdi, %rdx + + // XRayEntryType::ENTRY into the second + xorq %rsi, %rsi + + // 32-bit function ID becomes the first + movl %r10d, %edi callq *%rax .Larg1entryFail: @@ -189,4 +195,38 @@ __xray_ArgLoggerEntry: .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry .cfi_endproc +//===----------------------------------------------------------------------===// + + .global __xray_CustomEvent + .align 16, 0x90 + .type __xray_CustomEvent,@function +__xray_CustomEvent: + .cfi_startproc + subq $16, %rsp + .cfi_def_cfa_offset 24 + movq %rbp, 8(%rsp) + movq %rax, 0(%rsp) + + // We take two arguments to this trampoline, which should be in rdi and rsi + // already. We also make sure that we stash %rax because we use that register + // to call the logging handler. + movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax + testq %rax,%rax + je .LcustomEventCleanup + + // At this point we know that rcx and rdx already has the data, so we just + // call the logging handler. + callq *%rax + +.LcustomEventCleanup: + movq 0(%rsp), %rax + movq 8(%rsp), %rbp + addq $16, %rsp + .cfi_def_cfa_offset 8 + retq + +.Ltmp8: + .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent + .cfi_endproc + NO_EXEC_STACK_DIRECTIVE diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc index 2e9a8d270c33..e34806fa1cea 100644 --- a/lib/xray/xray_x86_64.cc +++ b/lib/xray/xray_x86_64.cc @@ -75,8 +75,10 @@ uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { static constexpr uint8_t CallOpCode = 0xe8; static constexpr uint16_t MovR10Seq = 0xba41; static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint16_t Jmp20Seq = 0x14eb; static constexpr uint8_t JmpOpCode = 0xe9; static constexpr uint8_t RetOpCode = 0xc3; +static constexpr uint16_t NopwSeq = 0x9066; static constexpr int64_t MinOffset{std::numeric_limits::min()}; static constexpr int64_t MaxOffset{std::numeric_limits::max()}; @@ -201,6 +203,40 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, return true; } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +19 // 2 bytes + // ... + // + // With the following: + // + // nopw // 2 bytes* + // ... + // + // We need to do this in the following order: + // + // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is + // the relative offset to the __xray_CustomEvent trampoline. + // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'. + // This allows us to "enable" this code once the changes have committed. + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'. + // + if (Enable) { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), NopwSeq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp20Seq, + std::memory_order_release); + } + return false; +} + // We determine whether the CPU we're running on has the correct features we // need. In x86_64 this will be rdtscp support. bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { diff --git a/test/asan/CMakeLists.txt b/test/asan/CMakeLists.txt index b8e365227780..87fa9d138748 100644 --- a/test/asan/CMakeLists.txt +++ b/test/asan/CMakeLists.txt @@ -3,9 +3,17 @@ set(ASAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(ASAN_TESTSUITES) set(ASAN_DYNAMIC_TESTSUITES) -# FIXME: Shadow memory for 64-bit asan easily exhausts swap on most machines. -# Find a way to make these tests pass reliably, and re-enable them. -if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +# Before Windows 8 (CMAKE_SYSTEM_VERSION 6.2), reserving large regions of shadow +# memory allocated physical memory for page tables, which made it very +# unreliable. Remove the asan tests from check-all in this configuration. +set(SHADOW_MAPPING_UNRELIABLE FALSE) +if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND + ${CMAKE_SYSTEM_VERSION} LESS 6.2) + set(SHADOW_MAPPING_UNRELIABLE TRUE) + message(WARNING "Disabling ASan tests because they are unreliable on Windows 7 and earlier") +endif() + +if (SHADOW_MAPPING_UNRELIABLE) set(EXCLUDE_FROM_ALL TRUE) endif() @@ -165,7 +173,6 @@ if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME) endif() # Reset EXCLUDE_FROM_ALL to its initial value. -# FIXME: Remove when we run Win64 asan tests. -if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +if (SHADOW_MAPPING_UNRELIABLE) set(EXCLUDE_FROM_ALL FALSE) endif() diff --git a/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc b/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc new file mode 100644 index 000000000000..a845721d5982 --- /dev/null +++ b/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc @@ -0,0 +1,30 @@ +// REQUIRES: x86_64-target-arch +// RUN: %clangxx_asan %s -o %t +// RUN: not %run %t 2>&1 | FileCheck %s +#include +#include +#include +#include +#include + +int main() { + __sanitizer_sandbox_arguments args = {0}; + // should cache /proc/self/maps + __sanitizer_sandbox_on_notify(&args); + + if (unshare(CLONE_NEWUSER)) { + printf("unshare failed\n"); + abort(); + } + + // remove access to /proc/self/maps + if (chroot("/tmp")) { + printf("chroot failed\n"); + abort(); + } + + *(volatile int*)0x42 = 0; +// CHECK: AddressSanitizer: SEGV on unknown address 0x000000000042 +// CHECK-NOT: AddressSanitizer CHECK failed +// CHECK: SUMMARY: AddressSanitizer: SEGV +} diff --git a/test/asan/TestCases/Posix/strndup_oob_test.cc b/test/asan/TestCases/Posix/strndup_oob_test.cc new file mode 100644 index 000000000000..7ea0b7a33400 --- /dev/null +++ b/test/asan/TestCases/Posix/strndup_oob_test.cc @@ -0,0 +1,27 @@ +// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s + +// When built as C on Linux, strndup is transformed to __strndup. +// RUN: %clangxx_asan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck %s + +// Unwind problem on arm: "main" is missing from the allocation stack trace. +// UNSUPPORTED: win32,s390,armv7l-unknown-linux-gnueabihf + +#include + +char kString[] = "foo"; + +int main(int argc, char **argv) { + char *copy = strndup(kString, 2); + int x = copy[2 + argc]; // BOOM + // CHECK: AddressSanitizer: heap-buffer-overflow + // CHECK: #0 {{.*}}main {{.*}}strndup_oob_test.cc:[[@LINE-2]] + // CHECK-LABEL: allocated by thread T{{.*}} here: + // CHECK: #{{[01]}} {{.*}}strndup + // CHECK: #{{.*}}main {{.*}}strndup_oob_test.cc:[[@LINE-6]] + // CHECK-LABEL: SUMMARY + // CHECK: strndup_oob_test.cc:[[@LINE-7]] + return x; +} diff --git a/test/asan/lit.cfg b/test/asan/lit.cfg index b433a91e830e..063c33b02697 100644 --- a/test/asan/lit.cfg +++ b/test/asan/lit.cfg @@ -38,6 +38,11 @@ if config.host_os == 'Darwin': # Also, make sure we do not overwhelm the syslog while testing. default_asan_opts = 'abort_on_error=0' default_asan_opts += ':log_to_syslog=0' + + # On Darwin, leak checking is not enabled by default. Enable for x86_64 + # tests to prevent regressions + if config.target_arch == 'x86_64': + default_asan_opts += ':detect_leaks=1' elif config.android: # The same as on Darwin, we default to "abort_on_error=1" which slows down # testing. Also, all existing tests are using "not" instead of "not --crash" @@ -215,7 +220,9 @@ if re.search('mthumb', config.target_cflags) is not None: config.available_features.add('fast-unwinder-works') # Turn on leak detection on 64-bit Linux. -if config.host_os == 'Linux' and (config.target_arch == 'x86_64' or config.target_arch == 'i386'): +leak_detection_linux = (config.host_os == 'Linux') and (config.target_arch == 'x86_64' or config.target_arch == 'i386') +leak_detection_mac = (config.host_os == 'Darwin') and (config.target_arch == 'x86_64') +if leak_detection_linux or leak_detection_mac: config.available_features.add('leak-detection') # Set LD_LIBRARY_PATH to pick dynamic runtime up properly. diff --git a/test/dfsan/custom.cc b/test/dfsan/custom.cc index c96d94053986..b36db01bc48c 100644 --- a/test/dfsan/custom.cc +++ b/test/dfsan/custom.cc @@ -3,7 +3,7 @@ // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES -mllvm -dfsan-args-abi %s -o %t && %run %t -// XFAIL: target-is-mips64el +// XFAIL: target-is-mips64,target-is-mips64el // Tests custom implementations of various glibc functions. diff --git a/test/lsan/TestCases/link_turned_off.cc b/test/lsan/TestCases/link_turned_off.cc index b8458de63ddd..fd11272ceae3 100644 --- a/test/lsan/TestCases/link_turned_off.cc +++ b/test/lsan/TestCases/link_turned_off.cc @@ -3,6 +3,8 @@ // RUN: %clangxx_lsan %s -o %t // RUN: %env_lsan_opts=$LSAN_BASE %run %t // RUN: %env_lsan_opts=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s +// +// UNSUPPORTED: darwin #include diff --git a/test/lsan/TestCases/recoverable_leak_check.cc b/test/lsan/TestCases/recoverable_leak_check.cc index 909698561e82..85988e2c1c70 100644 --- a/test/lsan/TestCases/recoverable_leak_check.cc +++ b/test/lsan/TestCases/recoverable_leak_check.cc @@ -3,6 +3,8 @@ // RUN: %clangxx_lsan %s -o %t // RUN: %env_lsan_opts=$LSAN_BASE %run %t foo 2>&1 | FileCheck %s // RUN: %env_lsan_opts=$LSAN_BASE %run %t 2>&1 | FileCheck %s +// +// UNSUPPORTED: darwin #include #include diff --git a/test/lsan/lit.common.cfg b/test/lsan/lit.common.cfg index da439d4c0282..309e8f27be66 100644 --- a/test/lsan/lit.common.cfg +++ b/test/lsan/lit.common.cfg @@ -67,9 +67,10 @@ config.substitutions.append( ("%clangxx ", build_invocation(clang_cxxflags)) ) config.substitutions.append( ("%clang_lsan ", build_invocation(clang_lsan_cflags)) ) config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxflags)) ) -# LeakSanitizer tests are currently supported on x86-64 Linux, PowerPC64 Linux, arm Linux, and mips64 Linux only. +# LeakSanitizer tests are currently supported on x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, and x86_64 Darwin. supported_linux = config.host_os is 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'mips64', 'arm', 'armhf', 'armv7l'] -if not (supported_linux): +supported_darwin = config.host_os is 'Darwin' and config.target_arch is 'x86_64' +if not (supported_linux or supported_darwin): config.unsupported = True # Don't support Thumb due to broken fast unwinder diff --git a/test/msan/chained_origin_memcpy.cc b/test/msan/chained_origin_memcpy.cc index bfe50dfec3f5..0c94f2b13f17 100644 --- a/test/msan/chained_origin_memcpy.cc +++ b/test/msan/chained_origin_memcpy.cc @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { // CHECK: Uninitialized value was stored to memory at // CHECK-FULL-STACK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:}}[[@LINE-15]] -// CHECK-SHORT-STACK: {{#0 .* in __msan_memcpy .*msan_interceptors.cc:}} +// CHECK-SHORT-STACK: {{#0 .* in __msan_memcpy.*msan_interceptors.cc:}} // CHECK: Uninitialized value was stored to memory at // CHECK-FULL-STACK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-29]] diff --git a/test/msan/pr32842.c b/test/msan/pr32842.c new file mode 100644 index 000000000000..b0a05f751d55 --- /dev/null +++ b/test/msan/pr32842.c @@ -0,0 +1,22 @@ +// Regression test for https://bugs.llvm.org/show_bug.cgi?id=32842 +// +// RUN: %clang_msan -g %s -o %t +// RUN: not %run %t 2>&1 | FileCheck %s + +struct iphdr { + unsigned char pad1: 2, ihl:4, pad2: 2; +}; + +int raw_send_hdrinc(unsigned long int length) { + struct iphdr iph; + if (iph.ihl * 4 > length) { + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) { + return raw_send_hdrinc(12); +} + +// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value diff --git a/test/msan/strndup.cc b/test/msan/strndup.cc new file mode 100644 index 000000000000..d4b9af1a9a6e --- /dev/null +++ b/test/msan/strndup.cc @@ -0,0 +1,28 @@ +// RUN: %clangxx_msan %s -o %t && not %run %t 2>&1 | FileCheck --check-prefix=ON %s +// RUN: %clangxx_msan %s -o %t && MSAN_OPTIONS=intercept_strndup=0 %run %t 2>&1 | FileCheck --check-prefix=OFF --allow-empty %s + +// When built as C on Linux, strndup is transformed to __strndup. +// RUN: %clangxx_msan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck --check-prefix=ON %s + +// UNSUPPORTED: win32 + +#include +#include +#include +#include + +int main(int argc, char **argv) { + char kString[4] = "abc"; + __msan_poison(kString + 2, 1); + char *copy = strndup(kString, 4); // BOOM + assert(__msan_test_shadow(copy, 4) == 2); // Poisoning is preserved. + free(copy); + return 0; + // ON: Uninitialized bytes in __interceptor_{{(__)?}}strndup at offset 2 inside [{{.*}}, 4) + // ON: MemorySanitizer: use-of-uninitialized-value + // ON: #0 {{.*}}main {{.*}}strndup.cc:[[@LINE-6]] + // ON-LABEL: SUMMARY + // ON: {{.*}}strndup.cc:[[@LINE-8]] + // OFF-NOT: MemorySanitizer +} + diff --git a/test/msan/wcsncpy.cc b/test/msan/wcsncpy.cc index f582c37b7aab..6471371de980 100644 --- a/test/msan/wcsncpy.cc +++ b/test/msan/wcsncpy.cc @@ -1,6 +1,8 @@ // RUN: %clangxx_msan -fsanitize-memory-track-origins -O0 %s -o %t && not %run %t >%t.out 2>&1 // RUN: FileCheck %s < %t.out && FileCheck %s < %t.out +// XFAIL: mips + #include #include @@ -27,12 +29,12 @@ int main() { } // CHECK: Uninitialized bytes in __msan_check_mem_is_initialized // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value -// CHECK: in main {{.*}}wcsncpy.cc:26 +// CHECK: in main {{.*}}wcsncpy.cc:28 // CHECK: Uninitialized value was stored to memory at // CHECK: in {{[^\s]*}}wcsncpy -// CHECK: in main {{.*}}wcsncpy.cc:25 +// CHECK: in main {{.*}}wcsncpy.cc:27 // CHECK: Memory was marked as uninitialized // CHECK: in __msan_allocated_memory -// CHECK: in main {{.*}}wcsncpy.cc:23 +// CHECK: in main {{.*}}wcsncpy.cc:25 diff --git a/test/safestack/canary.c b/test/safestack/canary.c index c6b81f24327f..1ceaa50656f6 100644 --- a/test/safestack/canary.c +++ b/test/safestack/canary.c @@ -2,7 +2,8 @@ // RUN: %run %t.nossp 2>&1 | FileCheck --check-prefix=NOSSP %s // RUN: %clang_safestack -fstack-protector-all -D_FORTIFY_SOURCE=0 -g %s -o %t.ssp -// RUN: not --crash %run %t.ssp 2>&1 | FileCheck -check-prefix=SSP %s +// RUN: env LIBC_FATAL_STDERR_=1 not --crash %run %t.ssp 2>&1 | \ +// RUN: FileCheck -check-prefix=SSP %s // Test stack canaries on the unsafe stack. diff --git a/test/sanitizer_common/TestCases/Linux/getpwnam_r_invalid_user.cc b/test/sanitizer_common/TestCases/Linux/getpwnam_r_invalid_user.cc index c0d6cfea1fbe..5bee1fb4bc93 100644 --- a/test/sanitizer_common/TestCases/Linux/getpwnam_r_invalid_user.cc +++ b/test/sanitizer_common/TestCases/Linux/getpwnam_r_invalid_user.cc @@ -1,6 +1,8 @@ // Regression test for a crash in getpwnam_r and similar interceptors. // RUN: %clangxx -O0 -g %s -o %t && %run %t +// XFAIL: mips + #include #include #include diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc index 8430539829b0..8751930345e5 100644 --- a/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc +++ b/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc @@ -2,7 +2,7 @@ // // REQUIRES: has_sancovcc,stable-runtime // UNSUPPORTED: i386-darwin -// XFAIL: tsan,powerpc64,s390x,mips +// XFAIL: tsan // // RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,bb,no-prune 2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 3 // RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,bb 2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 2 diff --git a/test/ubsan/TestCases/Misc/missing_return.cpp b/test/ubsan/TestCases/Misc/missing_return.cpp index 7b56b97048e3..5c5b286f1a65 100644 --- a/test/ubsan/TestCases/Misc/missing_return.cpp +++ b/test/ubsan/TestCases/Misc/missing_return.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsanitize=return -g %s -O3 -o %t +// RUN: %clangxx -fsanitize=return %gmlt %s -O3 -o %t // RUN: not %run %t 2>&1 | FileCheck %s // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-STACKTRACE diff --git a/test/ubsan/TestCases/TypeCheck/misaligned.cpp b/test/ubsan/TestCases/TypeCheck/misaligned.cpp index b3ff3588ba28..4eaedf37e565 100644 --- a/test/ubsan/TestCases/TypeCheck/misaligned.cpp +++ b/test/ubsan/TestCases/TypeCheck/misaligned.cpp @@ -1,8 +1,4 @@ -// FIXME: This test currently fails on Windows because we use the MSVC linker, -// which throws away DWARF debug info. -// XFAIL: win32 -// -// RUN: %clangxx -fsanitize=alignment -g %s -O3 -o %t +// RUN: %clangxx %gmlt -fsanitize=alignment %s -O3 -o %t // RUN: %run %t l0 && %run %t s0 && %run %t r0 && %run %t m0 && %run %t f0 && %run %t n0 && %run %t u0 // RUN: %run %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --strict-whitespace // RUN: %run %t s1 2>&1 | FileCheck %s --check-prefix=CHECK-STORE diff --git a/test/ubsan/lit.common.cfg b/test/ubsan/lit.common.cfg index cd6d209ee4a9..e3a1367e748a 100644 --- a/test/ubsan/lit.common.cfg +++ b/test/ubsan/lit.common.cfg @@ -38,6 +38,9 @@ else: lit_config.fatal("Unknown UBSan test mode: %r" % ubsan_lit_test_mode) # Platform-specific default for lit tests. +if config.target_arch == 's390x': + # On SystemZ we need -mbackchain to make the fast unwinder work. + clang_ubsan_cflags.append("-mbackchain") if config.host_os == 'Darwin': # On Darwin, we default to `abort_on_error=1`, which would make tests run # much slower. Let's override this and run lit tests with 'abort_on_error=0'. @@ -61,6 +64,7 @@ clang_ubsan_cxxflags = config.cxx_mode_flags + clang_ubsan_cflags # Define %clang and %clangxx substitutions to use in test RUN lines. config.substitutions.append( ("%clang ", build_invocation(clang_ubsan_cflags)) ) config.substitutions.append( ("%clangxx ", build_invocation(clang_ubsan_cxxflags)) ) +config.substitutions.append( ("%gmlt ", " ".join(config.debug_info_flags) + " ") ) # Default test suffixes. config.suffixes = ['.c', '.cc', '.cpp'] diff --git a/test/xray/TestCases/Linux/coverage-sample.cc b/test/xray/TestCases/Linux/coverage-sample.cc index df23d9f738de..623b4e34541b 100644 --- a/test/xray/TestCases/Linux/coverage-sample.cc +++ b/test/xray/TestCases/Linux/coverage-sample.cc @@ -2,9 +2,6 @@ // // RUN: %clangxx_xray -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false" %run %t | FileCheck %s -// FIXME: When run this test case causes a segementation fault on powerpc64le. -// Remove the xfail when the problem is fixed. -// XFAIL: powerpc64le #include "xray/xray_interface.h" diff --git a/test/xray/TestCases/Linux/custom-event-logging.cc b/test/xray/TestCases/Linux/custom-event-logging.cc new file mode 100644 index 000000000000..b1a766d46045 --- /dev/null +++ b/test/xray/TestCases/Linux/custom-event-logging.cc @@ -0,0 +1,40 @@ +// Use the clang feature for custom xray event logging. +// +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s +// FIXME: Support this in non-x86_64 as well +// REQUIRES: x86_64-linux +// REQUIRES: built-in-llvm-tree +#include +#include "xray/xray_interface.h" + +[[clang::xray_always_instrument]] void foo() { + static constexpr char CustomLogged[] = "hello custom logging!"; + printf("before calling the custom logging...\n"); + __xray_customevent(CustomLogged, sizeof(CustomLogged)); + printf("after calling the custom logging...\n"); +} + +void myprinter(void* ptr, size_t size) { + printf("%.*s\n", static_cast(size), static_cast(ptr)); +} + +int main() { + foo(); + // CHECK: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... + printf("setting up custom event handler...\n"); + // CHECK-NEXT: setting up custom event handler... + __xray_set_customevent_handler(myprinter); + __xray_patch(); + // CHECK-NEXT: before calling the custom logging... + foo(); + // CHECK-NEXT: hello custom logging! + // CHECK-NEXT: after calling the custom logging... + printf("removing custom event handler...\n"); + // CHECK-NEXT: removing custom event handler... + __xray_set_customevent_handler(nullptr); + foo(); + // CHECK-NEXT: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... +} diff --git a/test/xray/TestCases/Linux/func-id-utils.cc b/test/xray/TestCases/Linux/func-id-utils.cc index 82ba34d30acc..c9a2952c695d 100644 --- a/test/xray/TestCases/Linux/func-id-utils.cc +++ b/test/xray/TestCases/Linux/func-id-utils.cc @@ -3,8 +3,6 @@ // // RUN: %clangxx_xray -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false" %run %t -// FIXME: When we know why this fails in ppc, un-xfail it. -// XFAIL: powerpc64le #include "xray/xray_interface.h" #include @@ -32,13 +30,21 @@ assert(all_instrumented.size() == __xray_max_function_id() && "each function id must be assigned to a unique function"); - std::set common; - std::set_intersection(all_instrumented.begin(), all_instrumented.end(), - must_be_instrumented.begin(), - must_be_instrumented.end(), - std::inserter(common, common.begin())); + std::set not_instrumented; + const auto comp = [](void *lhs, void *rhs) { +#ifdef __PPC__ + return reinterpret_cast(lhs) + 8 < + reinterpret_cast(rhs); +#else + return lhs < rhs; +#endif + }; + std::set_difference(must_be_instrumented.begin(), must_be_instrumented.end(), + all_instrumented.begin(), all_instrumented.end(), + std::inserter(not_instrumented, not_instrumented.begin()), + comp); assert( - common == must_be_instrumented && + not_instrumented.empty() && "we should see all explicitly instrumented functions with function ids"); - return common == must_be_instrumented ? 0 : 1; + return not_instrumented.empty() ? 0 : 1; } -- cgit v1.2.3