diff options
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp')
-rw-r--r-- | contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 1350 |
1 files changed, 657 insertions, 693 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index a21da9c81c6f..fd9441dfcb53 100644 --- a/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -16,6 +16,7 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_file.h" +#include "sanitizer_common/sanitizer_interface_internal.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_stackdepot.h" @@ -28,29 +29,28 @@ #include "tsan_symbolize.h" #include "ubsan/ubsan_init.h" -#ifdef __SSE3__ -// <emmintrin.h> transitively includes <stdlib.h>, -// and it's prohibited to include std headers into tsan runtime. -// So we do this dirty trick. -#define _MM_MALLOC_H_INCLUDED -#define __MM_MALLOC_H -#include <emmintrin.h> -typedef __m128i m128; -#endif - volatile int __tsan_resumed = 0; extern "C" void __tsan_resume() { __tsan_resumed = 1; } +SANITIZER_WEAK_DEFAULT_IMPL +void __tsan_test_only_on_fork() {} + namespace __tsan { -#if !SANITIZER_GO && !SANITIZER_MAC +#if !SANITIZER_GO +void (*on_initialize)(void); +int (*on_finalize)(int); +#endif + +#if !SANITIZER_GO && !SANITIZER_APPLE __attribute__((tls_model("initial-exec"))) -THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64); +THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED( + SANITIZER_CACHE_LINE_SIZE); #endif -static char ctx_placeholder[sizeof(Context)] ALIGNED(64); +static char ctx_placeholder[sizeof(Context)] ALIGNED(SANITIZER_CACHE_LINE_SIZE); Context *ctx; // Can be overriden by a front-end. @@ -58,113 +58,404 @@ Context *ctx; bool OnFinalize(bool failed); void OnInitialize(); #else -#include <dlfcn.h> SANITIZER_WEAK_CXX_DEFAULT_IMPL bool OnFinalize(bool failed) { -#if !SANITIZER_GO - if (auto *ptr = dlsym(RTLD_DEFAULT, "__tsan_on_finalize")) - return reinterpret_cast<decltype(&__tsan_on_finalize)>(ptr)(failed); -#endif +# if !SANITIZER_GO + if (on_finalize) + return on_finalize(failed); +# endif return failed; } + SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnInitialize() { -#if !SANITIZER_GO - if (auto *ptr = dlsym(RTLD_DEFAULT, "__tsan_on_initialize")) { - return reinterpret_cast<decltype(&__tsan_on_initialize)>(ptr)(); - } +# if !SANITIZER_GO + if (on_initialize) + on_initialize(); +# endif +} #endif + +static TracePart* TracePartAlloc(ThreadState* thr) { + TracePart* part = nullptr; + { + Lock lock(&ctx->slot_mtx); + uptr max_parts = Trace::kMinParts + flags()->history_size; + Trace* trace = &thr->tctx->trace; + if (trace->parts_allocated == max_parts || + ctx->trace_part_finished_excess) { + part = ctx->trace_part_recycle.PopFront(); + DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part); + if (part && part->trace) { + Trace* trace1 = part->trace; + Lock trace_lock(&trace1->mtx); + part->trace = nullptr; + TracePart* part1 = trace1->parts.PopFront(); + CHECK_EQ(part, part1); + if (trace1->parts_allocated > trace1->parts.Size()) { + ctx->trace_part_finished_excess += + trace1->parts_allocated - trace1->parts.Size(); + trace1->parts_allocated = trace1->parts.Size(); + } + } + } + if (trace->parts_allocated < max_parts) { + trace->parts_allocated++; + if (ctx->trace_part_finished_excess) + ctx->trace_part_finished_excess--; + } + if (!part) + ctx->trace_part_total_allocated++; + else if (ctx->trace_part_recycle_finished) + ctx->trace_part_recycle_finished--; + } + if (!part) + part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart(); + return part; +} + +static void TracePartFree(TracePart* part) SANITIZER_REQUIRES(ctx->slot_mtx) { + DCHECK(part->trace); + part->trace = nullptr; + ctx->trace_part_recycle.PushFront(part); +} + +void TraceResetForTesting() { + Lock lock(&ctx->slot_mtx); + while (auto* part = ctx->trace_part_recycle.PopFront()) { + if (auto trace = part->trace) + CHECK_EQ(trace->parts.PopFront(), part); + UnmapOrDie(part, sizeof(*part)); + } + ctx->trace_part_total_allocated = 0; + ctx->trace_part_recycle_finished = 0; + ctx->trace_part_finished_excess = 0; } + +static void DoResetImpl(uptr epoch) { + ThreadRegistryLock lock0(&ctx->thread_registry); + Lock lock1(&ctx->slot_mtx); + CHECK_EQ(ctx->global_epoch, epoch); + ctx->global_epoch++; + CHECK(!ctx->resetting); + ctx->resetting = true; + for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) { + ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked( + static_cast<Tid>(i)); + // Potentially we could purge all ThreadStatusDead threads from the + // registry. Since we reset all shadow, they can't race with anything + // anymore. However, their tid's can still be stored in some aux places + // (e.g. tid of thread that created something). + auto trace = &tctx->trace; + Lock lock(&trace->mtx); + bool attached = tctx->thr && tctx->thr->slot; + auto parts = &trace->parts; + bool local = false; + while (!parts->Empty()) { + auto part = parts->Front(); + local = local || part == trace->local_head; + if (local) + CHECK(!ctx->trace_part_recycle.Queued(part)); + else + ctx->trace_part_recycle.Remove(part); + if (attached && parts->Size() == 1) { + // The thread is running and this is the last/current part. + // Set the trace position to the end of the current part + // to force the thread to call SwitchTracePart and re-attach + // to a new slot and allocate a new trace part. + // Note: the thread is concurrently modifying the position as well, + // so this is only best-effort. The thread can only modify position + // within this part, because switching parts is protected by + // slot/trace mutexes that we hold here. + atomic_store_relaxed( + &tctx->thr->trace_pos, + reinterpret_cast<uptr>(&part->events[TracePart::kSize])); + break; + } + parts->Remove(part); + TracePartFree(part); + } + CHECK_LE(parts->Size(), 1); + trace->local_head = parts->Front(); + if (tctx->thr && !tctx->thr->slot) { + atomic_store_relaxed(&tctx->thr->trace_pos, 0); + tctx->thr->trace_prev_pc = 0; + } + if (trace->parts_allocated > trace->parts.Size()) { + ctx->trace_part_finished_excess += + trace->parts_allocated - trace->parts.Size(); + trace->parts_allocated = trace->parts.Size(); + } + } + while (ctx->slot_queue.PopFront()) { + } + for (auto& slot : ctx->slots) { + slot.SetEpoch(kEpochZero); + slot.journal.Reset(); + slot.thr = nullptr; + ctx->slot_queue.PushBack(&slot); + } + + DPrintf("Resetting shadow...\n"); + auto shadow_begin = ShadowBeg(); + auto shadow_end = ShadowEnd(); +#if SANITIZER_GO + CHECK_NE(0, ctx->mapped_shadow_begin); + shadow_begin = ctx->mapped_shadow_begin; + shadow_end = ctx->mapped_shadow_end; + VPrintf(2, "shadow_begin-shadow_end: (0x%zx-0x%zx)\n", + shadow_begin, shadow_end); #endif -static ALIGNED(64) char thread_registry_placeholder[sizeof(ThreadRegistry)]; - -static ThreadContextBase *CreateThreadContext(u32 tid) { - // Map thread trace when context is created. - char name[50]; - internal_snprintf(name, sizeof(name), "trace %u", tid); - MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); - const uptr hdr = GetThreadTraceHeader(tid); - internal_snprintf(name, sizeof(name), "trace header %u", tid); - MapThreadTrace(hdr, sizeof(Trace), name); - new((void*)hdr) Trace(); - // We are going to use only a small part of the trace with the default - // value of history_size. However, the constructor writes to the whole trace. - // Release the unused part. - uptr hdr_end = hdr + sizeof(Trace); - hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); - hdr_end = RoundUp(hdr_end, GetPageSizeCached()); - if (hdr_end < hdr + sizeof(Trace)) { - ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace)); - uptr unused = hdr + sizeof(Trace) - hdr_end; - if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) { - Report("ThreadSanitizer: failed to mprotect(%p, %p)\n", - hdr_end, unused); - CHECK("unable to mprotect" && 0); +#if SANITIZER_WINDOWS + auto resetFailed = + !ZeroMmapFixedRegion(shadow_begin, shadow_end - shadow_begin); +#else + auto resetFailed = + !MmapFixedSuperNoReserve(shadow_begin, shadow_end-shadow_begin, "shadow"); +# if !SANITIZER_GO + DontDumpShadow(shadow_begin, shadow_end - shadow_begin); +# endif +#endif + if (resetFailed) { + Printf("failed to reset shadow memory\n"); + Die(); + } + DPrintf("Resetting meta shadow...\n"); + ctx->metamap.ResetClocks(); + StoreShadow(&ctx->last_spurious_race, Shadow::kEmpty); + ctx->resetting = false; +} + +// Clang does not understand locking all slots in the loop: +// error: expecting mutex 'slot.mtx' to be held at start of each loop +void DoReset(ThreadState* thr, uptr epoch) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + for (auto& slot : ctx->slots) { + slot.mtx.Lock(); + if (UNLIKELY(epoch == 0)) + epoch = ctx->global_epoch; + if (UNLIKELY(epoch != ctx->global_epoch)) { + // Epoch can't change once we've locked the first slot. + CHECK_EQ(slot.sid, 0); + slot.mtx.Unlock(); + return; + } + } + DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); + DoResetImpl(epoch); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); +} + +void FlushShadowMemory() { DoReset(nullptr, 0); } + +static TidSlot* FindSlotAndLock(ThreadState* thr) + SANITIZER_ACQUIRE(thr->slot->mtx) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + CHECK(!thr->slot); + TidSlot* slot = nullptr; + for (;;) { + uptr epoch; + { + Lock lock(&ctx->slot_mtx); + epoch = ctx->global_epoch; + if (slot) { + // This is an exhausted slot from the previous iteration. + if (ctx->slot_queue.Queued(slot)) + ctx->slot_queue.Remove(slot); + thr->slot_locked = false; + slot->mtx.Unlock(); + } + for (;;) { + slot = ctx->slot_queue.PopFront(); + if (!slot) + break; + if (slot->epoch() != kEpochLast) { + ctx->slot_queue.PushBack(slot); + break; + } + } + } + if (!slot) { + DoReset(thr, epoch); + continue; } + slot->mtx.Lock(); + CHECK(!thr->slot_locked); + thr->slot_locked = true; + if (slot->thr) { + DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid, + slot->thr->tid); + slot->SetEpoch(slot->thr->fast_state.epoch()); + slot->thr = nullptr; + } + if (slot->epoch() != kEpochLast) + return slot; } - void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext)); - return new(mem) ThreadContext(tid); } +void SlotAttachAndLock(ThreadState* thr) { + TidSlot* slot = FindSlotAndLock(thr); + DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast<int>(slot->sid)); + CHECK(!slot->thr); + CHECK(!thr->slot); + slot->thr = thr; + thr->slot = slot; + Epoch epoch = EpochInc(slot->epoch()); + CHECK(!EpochOverflow(epoch)); + slot->SetEpoch(epoch); + thr->fast_state.SetSid(slot->sid); + thr->fast_state.SetEpoch(epoch); + if (thr->slot_epoch != ctx->global_epoch) { + thr->slot_epoch = ctx->global_epoch; + thr->clock.Reset(); #if !SANITIZER_GO -static const u32 kThreadQuarantineSize = 16; -#else -static const u32 kThreadQuarantineSize = 64; + thr->last_sleep_stack_id = kInvalidStackID; + thr->last_sleep_clock.Reset(); +#endif + } + thr->clock.Set(slot->sid, epoch); + slot->journal.PushBack({thr->tid, epoch}); +} + +static void SlotDetachImpl(ThreadState* thr, bool exiting) { + TidSlot* slot = thr->slot; + thr->slot = nullptr; + if (thr != slot->thr) { + slot = nullptr; // we don't own the slot anymore + if (thr->slot_epoch != ctx->global_epoch) { + TracePart* part = nullptr; + auto* trace = &thr->tctx->trace; + { + Lock l(&trace->mtx); + auto* parts = &trace->parts; + // The trace can be completely empty in an unlikely event + // the thread is preempted right after it acquired the slot + // in ThreadStart and did not trace any events yet. + CHECK_LE(parts->Size(), 1); + part = parts->PopFront(); + thr->tctx->trace.local_head = nullptr; + atomic_store_relaxed(&thr->trace_pos, 0); + thr->trace_prev_pc = 0; + } + if (part) { + Lock l(&ctx->slot_mtx); + TracePartFree(part); + } + } + return; + } + CHECK(exiting || thr->fast_state.epoch() == kEpochLast); + slot->SetEpoch(thr->fast_state.epoch()); + slot->thr = nullptr; +} + +void SlotDetach(ThreadState* thr) { + Lock lock(&thr->slot->mtx); + SlotDetachImpl(thr, true); +} + +void SlotLock(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + DCHECK(!thr->slot_locked); +#if SANITIZER_DEBUG + // Check these mutexes are not locked. + // We can call DoReset from SlotAttachAndLock, which will lock + // these mutexes, but it happens only every once in a while. + { ThreadRegistryLock lock(&ctx->thread_registry); } + { Lock lock(&ctx->slot_mtx); } #endif + TidSlot* slot = thr->slot; + slot->mtx.Lock(); + thr->slot_locked = true; + if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast)) + return; + SlotDetachImpl(thr, false); + thr->slot_locked = false; + slot->mtx.Unlock(); + SlotAttachAndLock(thr); +} + +void SlotUnlock(ThreadState* thr) { + DCHECK(thr->slot_locked); + thr->slot_locked = false; + thr->slot->mtx.Unlock(); +} Context::Context() : initialized(), report_mtx(MutexTypeReport), nreported(), - nmissed_expected(), - thread_registry(new (thread_registry_placeholder) ThreadRegistry( - CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse)), + thread_registry([](Tid tid) -> ThreadContextBase* { + return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid); + }), racy_mtx(MutexTypeRacy), racy_stacks(), - racy_addresses(), fired_suppressions_mtx(MutexTypeFired), - clock_alloc(LINKER_INITIALIZED, "clock allocator") { + slot_mtx(MutexTypeSlots), + resetting() { fired_suppressions.reserve(8); + for (uptr i = 0; i < ARRAY_SIZE(slots); i++) { + TidSlot* slot = &slots[i]; + slot->sid = static_cast<Sid>(i); + slot_queue.PushBack(slot); + } + global_epoch = 1; } +TidSlot::TidSlot() : mtx(MutexTypeSlot) {} + // The objects are allocated in TLS, so one may rely on zero-initialization. -ThreadState::ThreadState(Context *ctx, u32 tid, int unique_id, u64 epoch, - unsigned reuse_count, uptr stk_addr, uptr stk_size, - uptr tls_addr, uptr tls_size) - : fast_state(tid, epoch) - // Do not touch these, rely on zero initialization, - // they may be accessed before the ctor. - // , ignore_reads_and_writes() - // , ignore_interceptors() - , - clock(tid, reuse_count) -#if !SANITIZER_GO - , - jmp_bufs() -#endif - , - tid(tid), - unique_id(unique_id), - stk_addr(stk_addr), - stk_size(stk_size), - tls_addr(tls_addr), - tls_size(tls_size) +ThreadState::ThreadState(Tid tid) + // Do not touch these, rely on zero initialization, + // they may be accessed before the ctor. + // ignore_reads_and_writes() + // ignore_interceptors() + : tid(tid) { + CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0); #if !SANITIZER_GO - , - last_sleep_clock(tid) + // C/C++ uses fixed size shadow stack. + const int kInitStackSize = kShadowStackSize; + shadow_stack = static_cast<uptr*>( + MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack")); + SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack), + kInitStackSize * sizeof(uptr)); +#else + // Go uses malloc-allocated shadow stack with dynamic size. + const int kInitStackSize = 8; + shadow_stack = static_cast<uptr*>(Alloc(kInitStackSize * sizeof(uptr))); #endif -{ + shadow_stack_pos = shadow_stack; + shadow_stack_end = shadow_stack + kInitStackSize; } #if !SANITIZER_GO -static void MemoryProfiler(Context *ctx, fd_t fd, int i) { - uptr n_threads; - uptr n_running_threads; - ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads); +void MemoryProfiler(u64 uptime) { + if (ctx->memprof_fd == kInvalidFd) + return; InternalMmapVector<char> buf(4096); - WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads); - WriteToFile(fd, buf.data(), internal_strlen(buf.data())); + WriteMemoryProfile(buf.data(), buf.size(), uptime); + WriteToFile(ctx->memprof_fd, buf.data(), internal_strlen(buf.data())); +} + +static bool InitializeMemoryProfiler() { + ctx->memprof_fd = kInvalidFd; + const char *fname = flags()->profile_memory; + if (!fname || !fname[0]) + return false; + if (internal_strcmp(fname, "stdout") == 0) { + ctx->memprof_fd = 1; + } else if (internal_strcmp(fname, "stderr") == 0) { + ctx->memprof_fd = 2; + } else { + InternalScopedString filename; + filename.AppendF("%s.%d", fname, (int)internal_getpid()); + ctx->memprof_fd = OpenFile(filename.data(), WrOnly); + if (ctx->memprof_fd == kInvalidFd) { + Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", + filename.data()); + return false; + } + } + MemoryProfiler(0); + return true; } static void *BackgroundThread(void *arg) { @@ -172,64 +463,43 @@ static void *BackgroundThread(void *arg) { // We don't use ScopedIgnoreInterceptors, because we want ignores to be // enabled even when the thread function exits (e.g. during pthread thread // shutdown code). - cur_thread_init(); - cur_thread()->ignore_interceptors++; + cur_thread_init()->ignore_interceptors++; const u64 kMs2Ns = 1000 * 1000; + const u64 start = NanoTime(); - fd_t mprof_fd = kInvalidFd; - if (flags()->profile_memory && flags()->profile_memory[0]) { - if (internal_strcmp(flags()->profile_memory, "stdout") == 0) { - mprof_fd = 1; - } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) { - mprof_fd = 2; - } else { - InternalScopedString filename; - filename.append("%s.%d", flags()->profile_memory, (int)internal_getpid()); - fd_t fd = OpenFile(filename.data(), WrOnly); - if (fd == kInvalidFd) { - Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", - filename.data()); - } else { - mprof_fd = fd; - } - } - } - - u64 last_flush = NanoTime(); + u64 last_flush = start; uptr last_rss = 0; - for (int i = 0; - atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0; - i++) { + while (!atomic_load_relaxed(&ctx->stop_background_thread)) { SleepForMillis(100); u64 now = NanoTime(); // Flush memory if requested. if (flags()->flush_memory_ms > 0) { if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) { - VPrintf(1, "ThreadSanitizer: periodic memory flush\n"); + VReport(1, "ThreadSanitizer: periodic memory flush\n"); FlushShadowMemory(); - last_flush = NanoTime(); + now = last_flush = NanoTime(); } } - // GetRSS can be expensive on huge programs, so don't do it every 100ms. if (flags()->memory_limit_mb > 0) { uptr rss = GetRSS(); uptr limit = uptr(flags()->memory_limit_mb) << 20; - VPrintf(1, "ThreadSanitizer: memory flush check" - " RSS=%llu LAST=%llu LIMIT=%llu\n", + VReport(1, + "ThreadSanitizer: memory flush check" + " RSS=%llu LAST=%llu LIMIT=%llu\n", (u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20); if (2 * rss > limit + last_rss) { - VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n"); + VReport(1, "ThreadSanitizer: flushing memory due to RSS\n"); FlushShadowMemory(); rss = GetRSS(); - VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20); + now = NanoTime(); + VReport(1, "ThreadSanitizer: memory flushed RSS=%llu\n", + (u64)rss >> 20); } last_rss = rss; } - // Write memory profile if requested. - if (mprof_fd != kInvalidFd) - MemoryProfiler(ctx, mprof_fd, i); + MemoryProfiler(now - start); // Flush symbolizer cache if requested. if (flags()->flush_symbolizer_ms > 0) { @@ -260,31 +530,96 @@ static void StopBackgroundThread() { #endif void DontNeedShadowFor(uptr addr, uptr size) { - ReleaseMemoryPagesToOS(MemToShadow(addr), MemToShadow(addr + size)); + ReleaseMemoryPagesToOS(reinterpret_cast<uptr>(MemToShadow(addr)), + reinterpret_cast<uptr>(MemToShadow(addr + size))); } #if !SANITIZER_GO +// We call UnmapShadow before the actual munmap, at that point we don't yet +// know if the provided address/size are sane. We can't call UnmapShadow +// after the actual munmap becuase at that point the memory range can +// already be reused for something else, so we can't rely on the munmap +// return value to understand is the values are sane. +// While calling munmap with insane values (non-canonical address, negative +// size, etc) is an error, the kernel won't crash. We must also try to not +// crash as the failure mode is very confusing (paging fault inside of the +// runtime on some derived shadow address). +static bool IsValidMmapRange(uptr addr, uptr size) { + if (size == 0) + return true; + if (static_cast<sptr>(size) < 0) + return false; + if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) + return false; + // Check that if the start of the region belongs to one of app ranges, + // end of the region belongs to the same region. + const uptr ranges[][2] = { + {LoAppMemBeg(), LoAppMemEnd()}, + {MidAppMemBeg(), MidAppMemEnd()}, + {HiAppMemBeg(), HiAppMemEnd()}, + }; + for (auto range : ranges) { + if (addr >= range[0] && addr < range[1]) + return addr + size <= range[1]; + } + return false; +} + void UnmapShadow(ThreadState *thr, uptr addr, uptr size) { - if (size == 0) return; + if (size == 0 || !IsValidMmapRange(addr, size)) + return; DontNeedShadowFor(addr, size); ScopedGlobalProcessor sgp; - ctx->metamap.ResetRange(thr->proc(), addr, size); + SlotLocker locker(thr, true); + ctx->metamap.ResetRange(thr->proc(), addr, size, true); } #endif void MapShadow(uptr addr, uptr size) { + // Ensure thead registry lock held, so as to synchronize + // with DoReset, which also access the mapped_shadow_* ctxt fields. + ThreadRegistryLock lock0(&ctx->thread_registry); + static bool data_mapped = false; + +#if !SANITIZER_GO // Global data is not 64K aligned, but there are no adjacent mappings, // so we can get away with unaligned mapping. // CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment const uptr kPageSize = GetPageSizeCached(); uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), kPageSize); uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), kPageSize); - if (!MmapFixedSuperNoReserve(shadow_begin, shadow_end - shadow_begin, - "shadow")) + if (!MmapFixedNoReserve(shadow_begin, shadow_end - shadow_begin, "shadow")) Die(); +#else + uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), (64 << 10)); + uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), (64 << 10)); + VPrintf(2, "MapShadow for (0x%zx-0x%zx), begin/end: (0x%zx-0x%zx)\n", + addr, addr + size, shadow_begin, shadow_end); + + if (!data_mapped) { + // First call maps data+bss. + if (!MmapFixedSuperNoReserve(shadow_begin, shadow_end - shadow_begin, "shadow")) + Die(); + } else { + VPrintf(2, "ctx->mapped_shadow_{begin,end} = (0x%zx-0x%zx)\n", + ctx->mapped_shadow_begin, ctx->mapped_shadow_end); + // Second and subsequent calls map heap. + if (shadow_end <= ctx->mapped_shadow_end) + return; + if (!ctx->mapped_shadow_begin || ctx->mapped_shadow_begin > shadow_begin) + ctx->mapped_shadow_begin = shadow_begin; + if (shadow_begin < ctx->mapped_shadow_end) + shadow_begin = ctx->mapped_shadow_end; + VPrintf(2, "MapShadow begin/end = (0x%zx-0x%zx)\n", + shadow_begin, shadow_end); + if (!MmapFixedSuperNoReserve(shadow_begin, shadow_end - shadow_begin, + "shadow")) + Die(); + ctx->mapped_shadow_end = shadow_end; + } +#endif // Meta shadow is 2:1, so tread carefully. - static bool data_mapped = false; static uptr mapped_meta_end = 0; uptr meta_begin = (uptr)MemToMeta(addr); uptr meta_end = (uptr)MemToMeta(addr + size); @@ -297,12 +632,11 @@ void MapShadow(uptr addr, uptr size) { "meta shadow")) Die(); } else { - // Mapping continous heap. + // Mapping continuous heap. // Windows wants 64K alignment. meta_begin = RoundDownTo(meta_begin, 64 << 10); meta_end = RoundUpTo(meta_end, 64 << 10); - if (meta_end <= mapped_meta_end) - return; + CHECK_GT(meta_end, mapped_meta_end); if (meta_begin < mapped_meta_end) meta_begin = mapped_meta_end; if (!MmapFixedSuperNoReserve(meta_begin, meta_end - meta_begin, @@ -310,56 +644,8 @@ void MapShadow(uptr addr, uptr size) { Die(); mapped_meta_end = meta_end; } - VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n", - addr, addr+size, meta_begin, meta_end); -} - -void MapThreadTrace(uptr addr, uptr size, const char *name) { - DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size); - CHECK_GE(addr, TraceMemBeg()); - CHECK_LE(addr + size, TraceMemEnd()); - CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment - if (!MmapFixedSuperNoReserve(addr, size, name)) { - Printf("FATAL: ThreadSanitizer can not mmap thread trace (%p/%p)\n", - addr, size); - Die(); - } -} - -static void CheckShadowMapping() { - uptr beg, end; - for (int i = 0; GetUserRegion(i, &beg, &end); i++) { - // Skip cases for empty regions (heap definition for architectures that - // do not use 64-bit allocator). - if (beg == end) - continue; - VPrintf(3, "checking shadow region %p-%p\n", beg, end); - uptr prev = 0; - for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) { - for (int x = -(int)kShadowCell; x <= (int)kShadowCell; x += kShadowCell) { - const uptr p = RoundDown(p0 + x, kShadowCell); - if (p < beg || p >= end) - continue; - const uptr s = MemToShadow(p); - const uptr m = (uptr)MemToMeta(p); - VPrintf(3, " checking pointer %p: shadow=%p meta=%p\n", p, s, m); - CHECK(IsAppMem(p)); - CHECK(IsShadowMem(s)); - CHECK_EQ(p, ShadowToMem(s)); - CHECK(IsMetaMem(m)); - if (prev) { - // Ensure that shadow and meta mappings are linear within a single - // user range. Lots of code that processes memory ranges assumes it. - const uptr prev_s = MemToShadow(prev); - const uptr prev_m = (uptr)MemToMeta(prev); - CHECK_EQ(s - prev_s, (p - prev) * kShadowMultiplier); - CHECK_EQ((m - prev_m) / kMetaShadowSize, - (p - prev) / kMetaShadowCell); - } - prev = p; - } - } - } + VPrintf(2, "mapped meta shadow for (0x%zx-0x%zx) at (0x%zx-0x%zx)\n", addr, + addr + size, meta_begin, meta_end); } #if !SANITIZER_GO @@ -380,15 +666,19 @@ void CheckUnwind() { // since we are going to die soon. ScopedIgnoreInterceptors ignore; #if !SANITIZER_GO - cur_thread()->ignore_sync++; - cur_thread()->ignore_reads_and_writes++; + ThreadState* thr = cur_thread(); + thr->nomalloc = false; + thr->ignore_sync++; + thr->ignore_reads_and_writes++; + atomic_store_relaxed(&thr->in_signal_handler, 0); #endif PrintCurrentStackSlow(StackTrace::GetCurrentPc()); } +bool is_initialized; + void Initialize(ThreadState *thr) { // Thread safe because done before all threads exist. - static bool is_initialized = false; if (is_initialized) return; is_initialized = true; @@ -409,9 +699,6 @@ void Initialize(ThreadState *thr) { __tsan::InitializePlatformEarly(); #if !SANITIZER_GO - // Re-exec ourselves if we need to set additional env or command line args. - MaybeReexec(); - InitializeAllocator(); ReplaceSystemMalloc(); #endif @@ -420,7 +707,6 @@ void Initialize(ThreadState *thr) { Processor *proc = ProcCreate(); ProcWire(proc, thr); InitializeInterceptors(); - CheckShadowMapping(); InitializePlatform(); InitializeDynamicAnnotations(); #if !SANITIZER_GO @@ -436,21 +722,23 @@ void Initialize(ThreadState *thr) { Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); #endif - VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", + VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n", (int)internal_getpid()); // Initialize thread 0. - int tid = ThreadCreate(thr, 0, 0, true); - CHECK_EQ(tid, 0); + Tid tid = ThreadCreate(nullptr, 0, 0, true); + CHECK_EQ(tid, kMainTid); ThreadStart(thr, tid, GetTid(), ThreadType::Regular); #if TSAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif - ctx->initialized = true; #if !SANITIZER_GO Symbolizer::LateInitialize(); + if (InitializeMemoryProfiler() || flags()->force_background_thread) + MaybeSpawnBackgroundThread(); #endif + ctx->initialized = true; if (flags()->stop_on_start) { Printf("ThreadSanitizer is suspended at startup (pid %d)." @@ -476,20 +764,21 @@ void MaybeSpawnBackgroundThread() { #endif } - int Finalize(ThreadState *thr) { bool failed = false; +#if !SANITIZER_GO if (common_flags()->print_module_map == 1) DumpProcessMap(); +#endif if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) - SleepForMillis(flags()->atexit_sleep_ms); + internal_usleep(u64(flags()->atexit_sleep_ms) * 1000); - // Wait for pending reports. - ctx->report_mtx.Lock(); - { ScopedErrorReportLock l; } - ctx->report_mtx.Unlock(); + { + // Wait for pending reports. + ScopedErrorReportLock lock; + } #if !SANITIZER_GO if (Verbosity()) AllocatorPrintStats(); @@ -506,18 +795,8 @@ int Finalize(ThreadState *thr) { #endif } - if (ctx->nmissed_expected) { - failed = true; - Printf("ThreadSanitizer: missed %d expected races\n", - ctx->nmissed_expected); - } - if (common_flags()->print_suppressions) PrintMatchedSuppressions(); -#if !SANITIZER_GO - if (flags()->print_benign) - PrintMatchedBenignRaces(); -#endif failed = OnFinalize(failed); @@ -525,10 +804,16 @@ int Finalize(ThreadState *thr) { } #if !SANITIZER_GO -void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { - ctx->thread_registry->Lock(); - ctx->report_mtx.Lock(); +void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + GlobalProcessorLock(); + // Detaching from the slot makes OnUserFree skip writing to the shadow. + // The slot will be locked so any attempts to use it will deadlock anyway. + SlotDetach(thr); + for (auto& slot : ctx->slots) slot.mtx.Lock(); + ctx->thread_registry.Lock(); + ctx->slot_mtx.Lock(); ScopedErrorReportLock::Lock(); + AllocatorLock(); // Suppress all reports in the pthread_atfork callbacks. // Reports will deadlock on the report_mtx. // We could ignore sync operations as well, @@ -537,36 +822,48 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports++; // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and // we'll assert in CheckNoLocks() unless we ignore interceptors. + // On OS X libSystem_atfork_prepare/parent/child callbacks are called + // after/before our callbacks and they call free. thr->ignore_interceptors++; + // Disables memory write in OnUserAlloc/Free. + thr->ignore_reads_and_writes++; + + __tsan_test_only_on_fork(); } -void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { +static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; + thr->ignore_reads_and_writes--; + AllocatorUnlock(); ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); - ctx->thread_registry->Unlock(); + ctx->slot_mtx.Unlock(); + ctx->thread_registry.Unlock(); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); + SlotAttachAndLock(thr); + SlotUnlock(thr); + GlobalProcessorUnlock(); } -void ForkChildAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { - thr->suppress_reports--; // Enabled in ForkBefore. - thr->ignore_interceptors--; - ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); - ctx->thread_registry->Unlock(); +void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); } - uptr nthread = 0; - ctx->thread_registry->GetNumberOfThreads(0, 0, &nthread /* alive threads */); - VPrintf(1, "ThreadSanitizer: forked new process with pid %d," - " parent had %d threads\n", (int)internal_getpid(), (int)nthread); +void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) { + ForkAfter(thr); + u32 nthread = ctx->thread_registry.OnFork(thr->tid); + VPrintf(1, + "ThreadSanitizer: forked new process with pid %d," + " parent had %d threads\n", + (int)internal_getpid(), (int)nthread); if (nthread == 1) { - StartBackgroundThread(); + if (start_thread) + StartBackgroundThread(); } else { // We've just forked a multi-threaded process. We cannot reasonably function // after that (some mutexes may be locked before fork). So just enable // ignores for everything in the hope that we will exec soon. ctx->after_multithreaded_fork = true; thr->ignore_interceptors++; + thr->suppress_reports++; ThreadIgnoreBegin(thr, pc); ThreadIgnoreSyncBegin(thr, pc); } @@ -578,19 +875,20 @@ NOINLINE void GrowShadowStack(ThreadState *thr) { const int sz = thr->shadow_stack_end - thr->shadow_stack; const int newsz = 2 * sz; - uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack, - newsz * sizeof(uptr)); + auto *newstack = (uptr *)Alloc(newsz * sizeof(uptr)); internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr)); - internal_free(thr->shadow_stack); + Free(thr->shadow_stack); thr->shadow_stack = newstack; thr->shadow_stack_pos = newstack + sz; thr->shadow_stack_end = newstack + newsz; } #endif -u32 CurrentStackId(ThreadState *thr, uptr pc) { +StackID CurrentStackId(ThreadState *thr, uptr pc) { +#if !SANITIZER_GO if (!thr->is_inited) // May happen during bootstrap. - return 0; + return kInvalidStackID; +#endif if (pc != 0) { #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -601,486 +899,149 @@ u32 CurrentStackId(ThreadState *thr, uptr pc) { thr->shadow_stack_pos[0] = pc; thr->shadow_stack_pos++; } - u32 id = StackDepotPut( + StackID id = StackDepotPut( StackTrace(thr->shadow_stack, thr->shadow_stack_pos - thr->shadow_stack)); if (pc != 0) thr->shadow_stack_pos--; return id; } -void TraceSwitch(ThreadState *thr) { -#if !SANITIZER_GO - if (ctx->after_multithreaded_fork) - return; -#endif - thr->nomalloc++; - Trace *thr_trace = ThreadTrace(thr->tid); - Lock l(&thr_trace->mtx); - unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); - TraceHeader *hdr = &thr_trace->headers[trace]; - hdr->epoch0 = thr->fast_state.epoch(); - ObtainCurrentStack(thr, 0, &hdr->stack0); - hdr->mset0 = thr->mset; - thr->nomalloc--; -} - -Trace *ThreadTrace(int tid) { - return (Trace*)GetThreadTraceHeader(tid); -} - -uptr TraceTopPC(ThreadState *thr) { - Event *events = (Event*)GetThreadTrace(thr->tid); - uptr pc = events[thr->fast_state.GetTracePos()]; - return pc; -} - -uptr TraceSize() { - return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); -} - -uptr TraceParts() { - return TraceSize() / kTracePartSize; -} - -#if !SANITIZER_GO -extern "C" void __tsan_trace_switch() { - TraceSwitch(cur_thread()); -} - -extern "C" void __tsan_report_race() { - ReportRace(cur_thread()); -} -#endif - -ALWAYS_INLINE -Shadow LoadShadow(u64 *p) { - u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed); - return Shadow(raw); -} - -ALWAYS_INLINE -void StoreShadow(u64 *sp, u64 s) { - atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed); -} - -ALWAYS_INLINE -void StoreIfNotYetStored(u64 *sp, u64 *s) { - StoreShadow(sp, *s); - *s = 0; -} - -ALWAYS_INLINE -void HandleRace(ThreadState *thr, u64 *shadow_mem, - Shadow cur, Shadow old) { - thr->racy_state[0] = cur.raw(); - thr->racy_state[1] = old.raw(); - thr->racy_shadow_addr = shadow_mem; -#if !SANITIZER_GO - HACKY_CALL(__tsan_report_race); -#else - ReportRace(thr); -#endif -} - -static inline bool HappensBefore(Shadow old, ThreadState *thr) { - return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); -} - -ALWAYS_INLINE -void MemoryAccessImpl1(ThreadState *thr, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur) { - - // This potentially can live in an MMX/SSE scratch register. - // The required intrinsics are: - // __m128i _mm_move_epi64(__m128i*); - // _mm_storel_epi64(u64*, __m128i); - u64 store_word = cur.raw(); - bool stored = false; - - // scan all the shadow values and dispatch to 4 categories: - // same, replace, candidate and race (see comments below). - // we consider only 3 cases regarding access sizes: - // equal, intersect and not intersect. initially I considered - // larger and smaller as well, it allowed to replace some - // 'candidates' with 'same' or 'replace', but I think - // it's just not worth it (performance- and complexity-wise). - - Shadow old(0); - - // It release mode we manually unroll the loop, - // because empirically gcc generates better code this way. - // However, we can't afford unrolling in debug mode, because the function - // consumes almost 4K of stack. Gtest gives only 4K of stack to death test - // threads, which is not enough for the unrolled loop. -#if SANITIZER_DEBUG - for (int idx = 0; idx < 4; idx++) { -#include "tsan_update_shadow_word_inl.h" - } -#else - int idx = 0; -#include "tsan_update_shadow_word_inl.h" - idx = 1; - if (stored) { -#include "tsan_update_shadow_word_inl.h" - } else { -#include "tsan_update_shadow_word_inl.h" - } - idx = 2; - if (stored) { -#include "tsan_update_shadow_word_inl.h" - } else { -#include "tsan_update_shadow_word_inl.h" - } - idx = 3; - if (stored) { -#include "tsan_update_shadow_word_inl.h" - } else { -#include "tsan_update_shadow_word_inl.h" - } -#endif - - // we did not find any races and had already stored - // the current access info, so we are done - if (LIKELY(stored)) - return; - // choose a random candidate slot and replace it - StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); - return; - RACE: - HandleRace(thr, shadow_mem, cur, old); - return; -} - -void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int size, bool kAccessIsWrite, bool kIsAtomic) { - while (size) { - int size1 = 1; - int kAccessSizeLog = kSizeLog1; - if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { - size1 = 8; - kAccessSizeLog = kSizeLog8; - } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { - size1 = 4; - kAccessSizeLog = kSizeLog4; - } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { - size1 = 2; - kAccessSizeLog = kSizeLog2; - } - MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); - addr += size1; - size -= size1; - } -} - -ALWAYS_INLINE -bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - Shadow cur(a); - for (uptr i = 0; i < kShadowCnt; i++) { - Shadow old(LoadShadow(&s[i])); - if (Shadow::Addr0AndSizeAreEqual(cur, old) && - old.TidWithIgnore() == cur.TidWithIgnore() && - old.epoch() > sync_epoch && - old.IsAtomic() == cur.IsAtomic() && - old.IsRead() <= cur.IsRead()) - return true; +static bool TraceSkipGap(ThreadState* thr) { + Trace *trace = &thr->tctx->trace; + Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos)); + DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0); + auto *part = trace->parts.Back(); + DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid, + trace, trace->parts.Front(), part, pos); + if (!part) + return false; + // We can get here when we still have space in the current trace part. + // The fast-path check in TraceAcquire has false positives in the middle of + // the part. Check if we are indeed at the end of the current part or not, + // and fill any gaps with NopEvent's. + Event* end = &part->events[TracePart::kSize]; + DCHECK_GE(pos, &part->events[0]); + DCHECK_LE(pos, end); + if (pos + 1 < end) { + if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) == + TracePart::kAlignment) + *pos++ = NopEvent; + *pos++ = NopEvent; + DCHECK_LE(pos + 2, end); + atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos)); + return true; } + // We are indeed at the end. + for (; pos < end; pos++) *pos = NopEvent; return false; } -#if defined(__SSE3__) -#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \ - _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \ - (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) -ALWAYS_INLINE -bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - // This is an optimized version of ContainsSameAccessSlow. - // load current access into access[0:63] - const m128 access = _mm_cvtsi64_si128(a); - // duplicate high part of access in addr0: - // addr0[0:31] = access[32:63] - // addr0[32:63] = access[32:63] - // addr0[64:95] = access[32:63] - // addr0[96:127] = access[32:63] - const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); - // load 4 shadow slots - const m128 shadow0 = _mm_load_si128((__m128i*)s); - const m128 shadow1 = _mm_load_si128((__m128i*)s + 1); - // load high parts of 4 shadow slots into addr_vect: - // addr_vect[0:31] = shadow0[32:63] - // addr_vect[32:63] = shadow0[96:127] - // addr_vect[64:95] = shadow1[32:63] - // addr_vect[96:127] = shadow1[96:127] - m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); - if (!is_write) { - // set IsRead bit in addr_vect - const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15); - const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); - addr_vect = _mm_or_si128(addr_vect, rw_mask); - } - // addr0 == addr_vect? - const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); - // epoch1[0:63] = sync_epoch - const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); - // epoch[0:31] = sync_epoch[0:31] - // epoch[32:63] = sync_epoch[0:31] - // epoch[64:95] = sync_epoch[0:31] - // epoch[96:127] = sync_epoch[0:31] - const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); - // load low parts of shadow cell epochs into epoch_vect: - // epoch_vect[0:31] = shadow0[0:31] - // epoch_vect[32:63] = shadow0[64:95] - // epoch_vect[64:95] = shadow1[0:31] - // epoch_vect[96:127] = shadow1[64:95] - const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); - // epoch_vect >= sync_epoch? - const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); - // addr_res & epoch_res - const m128 res = _mm_and_si128(addr_res, epoch_res); - // mask[0] = res[7] - // mask[1] = res[15] - // ... - // mask[15] = res[127] - const int mask = _mm_movemask_epi8(res); - return mask != 0; -} -#endif - -ALWAYS_INLINE -bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { -#if defined(__SSE3__) - bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); - // NOTE: this check can fail if the shadow is concurrently mutated - // by other threads. But it still can be useful if you modify - // ContainsSameAccessFast and want to ensure that it's not completely broken. - // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); - return res; -#else - return ContainsSameAccessSlow(s, a, sync_epoch, is_write); -#endif -} - -ALWAYS_INLINE USED -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) { - u64 *shadow_mem = (u64*)MemToShadow(addr); - DPrintf2("#%d: MemoryAccess: @%p %p size=%d" - " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", - (int)thr->fast_state.tid(), (void*)pc, (void*)addr, - (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, - (uptr)shadow_mem[0], (uptr)shadow_mem[1], - (uptr)shadow_mem[2], (uptr)shadow_mem[3]); -#if SANITIZER_DEBUG - if (!IsAppMem(addr)) { - Printf("Access to non app mem %zx\n", addr); - DCHECK(IsAppMem(addr)); - } - if (!IsShadowMem((uptr)shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); - DCHECK(IsShadowMem((uptr)shadow_mem)); - } -#endif - - if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) { - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. - return; - } - - FastState fast_state = thr->fast_state; - if (UNLIKELY(fast_state.GetIgnoreBit())) { - return; - } - - Shadow cur(fast_state); - cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); - cur.SetWrite(kAccessIsWrite); - cur.SetAtomic(kIsAtomic); - - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), - thr->fast_synch_epoch, kAccessIsWrite))) { - return; - } - - if (kCollectHistory) { - fast_state.IncrementEpoch(); - thr->fast_state = fast_state; - TraceAddEvent(thr, fast_state, EventTypeMop, pc); - cur.IncrementEpoch(); - } - - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); -} - -// Called by MemoryAccessRange in tsan_rtl_thread.cpp -ALWAYS_INLINE USED -void MemoryAccessImpl(ThreadState *thr, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur) { - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), - thr->fast_synch_epoch, kAccessIsWrite))) { - return; - } - - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); -} - -static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, - u64 val) { - (void)thr; - (void)pc; - if (size == 0) +NOINLINE +void TraceSwitchPart(ThreadState* thr) { + if (TraceSkipGap(thr)) return; - // FIXME: fix me. - uptr offset = addr % kShadowCell; - if (offset) { - offset = kShadowCell - offset; - if (size <= offset) +#if !SANITIZER_GO + if (ctx->after_multithreaded_fork) { + // We just need to survive till exec. + TracePart* part = thr->tctx->trace.parts.Back(); + if (part) { + atomic_store_relaxed(&thr->trace_pos, + reinterpret_cast<uptr>(&part->events[0])); return; - addr += offset; - size -= offset; - } - DCHECK_EQ(addr % 8, 0); - // If a user passes some insane arguments (memset(0)), - // let it just crash as usual. - if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) - return; - // Don't want to touch lots of shadow memory. - // If a program maps 10MB stack, there is no need reset the whole range. - size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); - // UnmapOrDie/MmapFixedNoReserve does not work on Windows. - if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) { - u64 *p = (u64*)MemToShadow(addr); - CHECK(IsShadowMem((uptr)p)); - CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1))); - // FIXME: may overwrite a part outside the region - for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { - p[i++] = val; - for (uptr j = 1; j < kShadowCnt; j++) - p[i++] = 0; - } - } else { - // The region is big, reset only beginning and end. - const uptr kPageSize = GetPageSizeCached(); - u64 *begin = (u64*)MemToShadow(addr); - u64 *end = begin + size / kShadowCell * kShadowCnt; - u64 *p = begin; - // Set at least first kPageSize/2 to page boundary. - while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) - *p++ = 0; - } - // Reset middle part. - u64 *p1 = p; - p = RoundDown(end, kPageSize); - if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1)) - Die(); - // Set the ending. - while (p < end) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) - *p++ = 0; } } +#endif + TraceSwitchPartImpl(thr); } -void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { - MemoryRangeSet(thr, pc, addr, size, 0); -} - -void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { - // Processing more than 1k (4k of shadow) is expensive, - // can cause excessive memory consumption (user does not necessary touch - // the whole range) and most likely unnecessary. - if (size > 1024) - size = 1024; - CHECK_EQ(thr->is_freeing, false); - thr->is_freeing = true; - MemoryAccessRange(thr, pc, addr, size, true); - thr->is_freeing = false; - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); +void TraceSwitchPartImpl(ThreadState* thr) { + SlotLocker locker(thr, true); + Trace* trace = &thr->tctx->trace; + TracePart* part = TracePartAlloc(thr); + part->trace = trace; + thr->trace_prev_pc = 0; + TracePart* recycle = nullptr; + // Keep roughly half of parts local to the thread + // (not queued into the recycle queue). + uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2; + { + Lock lock(&trace->mtx); + if (trace->parts.Empty()) + trace->local_head = part; + if (trace->parts.Size() >= local_parts) { + recycle = trace->local_head; + trace->local_head = trace->parts.Next(recycle); + } + trace->parts.PushBack(part); + atomic_store_relaxed(&thr->trace_pos, + reinterpret_cast<uptr>(&part->events[0])); } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.MarkAsFreed(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); -} - -void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); + // Make this part self-sufficient by restoring the current stack + // and mutex set in the beginning of the trace. + TraceTime(thr); + { + // Pathologically large stacks may not fit into the part. + // In these cases we log only fixed number of top frames. + const uptr kMaxFrames = 1000; + // Check that kMaxFrames won't consume the whole part. + static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big"); + uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames); + for (; pos < thr->shadow_stack_pos; pos++) { + if (TryTraceFunc(thr, *pos)) + continue; + CHECK(TraceSkipGap(thr)); + CHECK(TryTraceFunc(thr, *pos)); + } } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); -} - -void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, - uptr size) { - if (thr->ignore_reads_and_writes == 0) - MemoryRangeImitateWrite(thr, pc, addr, size); - else - MemoryResetRange(thr, pc, addr, size); -} - -ALWAYS_INLINE USED -void FuncEntry(ThreadState *thr, uptr pc) { - DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); + for (uptr i = 0; i < thr->mset.Size(); i++) { + MutexSet::Desc d = thr->mset.Get(i); + for (uptr i = 0; i < d.count; i++) + TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, + d.addr, d.stack_id); } - - // Shadow stack maintenance can be replaced with - // stack unwinding during trace switch (which presumably must be faster). - DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); -#if !SANITIZER_GO - DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); -#else - if (thr->shadow_stack_pos == thr->shadow_stack_end) - GrowShadowStack(thr); -#endif - thr->shadow_stack_pos[0] = pc; - thr->shadow_stack_pos++; -} - -ALWAYS_INLINE USED -void FuncExit(ThreadState *thr) { - DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); + // Callers of TraceSwitchPart expect that TraceAcquire will always succeed + // after the call. It's possible that TryTraceFunc/TraceMutexLock above + // filled the trace part exactly up to the TracePart::kAlignment gap + // and the next TraceAcquire won't succeed. Skip the gap to avoid that. + EventFunc *ev; + if (!TraceAcquire(thr, &ev)) { + CHECK(TraceSkipGap(thr)); + CHECK(TraceAcquire(thr, &ev)); } - - DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); -#if !SANITIZER_GO - DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); -#endif - thr->shadow_stack_pos--; + { + Lock lock(&ctx->slot_mtx); + // There is a small chance that the slot may be not queued at this point. + // This can happen if the slot has kEpochLast epoch and another thread + // in FindSlotAndLock discovered that it's exhausted and removed it from + // the slot queue. kEpochLast can happen in 2 cases: (1) if TraceSwitchPart + // was called with the slot locked and epoch already at kEpochLast, + // or (2) if we've acquired a new slot in SlotLock in the beginning + // of the function and the slot was at kEpochLast - 1, so after increment + // in SlotAttachAndLock it become kEpochLast. + if (ctx->slot_queue.Queued(thr->slot)) { + ctx->slot_queue.Remove(thr->slot); + ctx->slot_queue.PushBack(thr->slot); + } + if (recycle) + ctx->trace_part_recycle.PushBack(recycle); + } + DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid, + trace->parts.Front(), trace->parts.Back(), + atomic_load_relaxed(&thr->trace_pos)); } -void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack) { +void ThreadIgnoreBegin(ThreadState* thr, uptr pc) { DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); thr->ignore_reads_and_writes++; CHECK_GT(thr->ignore_reads_and_writes, 0); thr->fast_state.SetIgnoreBit(); #if !SANITIZER_GO - if (save_stack && !ctx->after_multithreaded_fork) + if (pc && !ctx->after_multithreaded_fork) thr->mop_ignore_set.Add(CurrentStackId(thr, pc)); #endif } -void ThreadIgnoreEnd(ThreadState *thr, uptr pc) { +void ThreadIgnoreEnd(ThreadState *thr) { DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid); CHECK_GT(thr->ignore_reads_and_writes, 0); thr->ignore_reads_and_writes--; @@ -1100,17 +1061,17 @@ uptr __tsan_testonly_shadow_stack_current_size() { } #endif -void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack) { +void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) { DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid); thr->ignore_sync++; CHECK_GT(thr->ignore_sync, 0); #if !SANITIZER_GO - if (save_stack && !ctx->after_multithreaded_fork) + if (pc && !ctx->after_multithreaded_fork) thr->sync_ignore_set.Add(CurrentStackId(thr, pc)); #endif } -void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) { +void ThreadIgnoreSyncEnd(ThreadState *thr) { DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid); CHECK_GT(thr->ignore_sync, 0); thr->ignore_sync--; @@ -1129,7 +1090,6 @@ void build_consistency_debug() {} #else void build_consistency_release() {} #endif - } // namespace __tsan #if SANITIZER_CHECK_DEADLOCKS @@ -1137,23 +1097,27 @@ namespace __sanitizer { using namespace __tsan; MutexMeta mutex_meta[] = { {MutexInvalid, "Invalid", {}}, - {MutexThreadRegistry, "ThreadRegistry", {}}, - {MutexTypeTrace, "Trace", {MutexLeaf}}, - {MutexTypeReport, "Report", {MutexTypeSyncVar}}, - {MutexTypeSyncVar, "SyncVar", {}}, + {MutexThreadRegistry, + "ThreadRegistry", + {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}}, + {MutexTypeReport, "Report", {MutexTypeTrace}}, + {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}}, {MutexTypeAnnotations, "Annotations", {}}, - {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}}, + {MutexTypeAtExit, "AtExit", {}}, {MutexTypeFired, "Fired", {MutexLeaf}}, {MutexTypeRacy, "Racy", {MutexLeaf}}, - {MutexTypeGlobalProc, "GlobalProc", {}}, + {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}}, + {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}}, + {MutexTypeTrace, "Trace", {}}, + {MutexTypeSlot, + "Slot", + {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry, + MutexTypeSlots}}, + {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}}, {}, }; void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); } -} // namespace __sanitizer -#endif -#if !SANITIZER_GO -// Must be included in this file to make sure everything is inlined. -# include "tsan_interface_inl.h" +} // namespace __sanitizer #endif |