src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-07-19 07:02:40 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-07-19 07:02:40 +0000
commit	462d72ec21756f5ca1002f128f014d36a845f3d2 (patch)
tree	b3d61e87f3a1b79e57d9639497994df822687bf0 /lib/tsan
parent	1992b790c2c12b7850bdf86662b67302052ec2fe (diff)
download	src-462d72ec21756f5ca1002f128f014d36a845f3d2.tar.gz src-462d72ec21756f5ca1002f128f014d36a845f3d2.zip

Vendor import of compiler-rt trunk r308421:vendor/compiler-rt/compiler-rt-trunk-r308421 vendor/compiler-rt/compiler-rt-release_50-r311219 vendor/compiler-rt/compiler-rt-release_50-r310316 vendor/compiler-rt/compiler-rt-release_50-r309439

https://llvm.org/svn/llvm-project/compiler-rt/trunk@308421

Notes

Notes: svn path=/vendor/compiler-rt/dist/; revision=321188 svn path=/vendor/compiler-rt/compiler-rt-release_50-r311219/; revision=322731; tag=vendor/compiler-rt/compiler-rt-release_50-r311219

Diffstat (limited to 'lib/tsan')

-rw-r--r--

lib/tsan/rtl/tsan_clock.cc

436

-rw-r--r--

lib/tsan/rtl/tsan_clock.h

207

-rw-r--r--

lib/tsan/rtl/tsan_defs.h

-rw-r--r--

lib/tsan/rtl/tsan_mman.cc

-rw-r--r--

lib/tsan/rtl/tsan_platform_linux.cc

-rw-r--r--

lib/tsan/tests/unit/tsan_clock_test.cc

6 files changed, 514 insertions, 198 deletions

diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 9ee9104283f8..ef984a45cd9d 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc

@@ -61,20 +61,13 @@

// an exclusive lock; ThreadClock's are private to respective threads and so

// do not need any protection.

-// Description of ThreadClock state:

-// clk_ - fixed size vector clock.

-// nclk_ - effective size of the vector clock (the rest is zeros).

-// tid_ - index of the thread associated with he clock ("current thread").

-// last_acquire_ - current thread time when it acquired something from

-// other threads.

-//

// Description of SyncClock state:

// clk_ - variable size vector clock, low kClkBits hold timestamp,

// the remaining bits hold "acquired" flag (the actual value is thread's

// reused counter);

// if acquried == thr->reused_, then the respective thread has already

-// acquired this clock (except possibly dirty_tids_).

-// dirty_tids_ - holds up to two indeces in the vector clock that other threads

+// acquired this clock (except possibly for dirty elements).

+// dirty_ - holds up to two indeces in the vector clock that other threads

// need to acquire regardless of "acquired" flag value;

// release_store_tid_ - denotes that the clock state is a result of

// release-store operation by the thread with release_store_tid_ index.

@@ -90,21 +83,51 @@

namespace __tsan {

+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {

+ return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);

+// Drop reference to the first level block idx.

+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {

+ ClockBlock *cb = ctx->clock_alloc.Map(idx);

+ atomic_uint32_t *ref = ref_ptr(cb);

+ u32 v = atomic_load(ref, memory_order_acquire);

+ for (;;) {

+ CHECK_GT(v, 0);

+ if (v == 1)

+ break;

+ if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))

+ return;

+ }

+ // First level block owns second level blocks, so them as well.

+ for (uptr i = 0; i < blocks; i++)

+ ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);

+ ctx->clock_alloc.Free(c, idx);

ThreadClock::ThreadClock(unsigned tid, unsigned reused)

: tid_(tid)

- , reused_(reused + 1) { // 0 has special meaning

+ , reused_(reused + 1) // 0 has special meaning

+ , cached_idx_()

+ , cached_size_()

+ , cached_blocks_() {

CHECK_LT(tid, kMaxTidInClock);

CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);

nclk_ = tid_ + 1;

last_acquire_ = 0;

internal_memset(clk_, 0, sizeof(clk_));

- clk_[tid_].reused = reused_;

}

void ThreadClock::ResetCached(ClockCache *c) {

+ if (cached_idx_) {

+ UnrefClockBlock(c, cached_idx_, cached_blocks_);

+ cached_idx_ = 0;

+ cached_size_ = 0;

+ cached_blocks_ = 0;

+ }

}

-void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {

+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(src->size_, kMaxTid);

CPP_STAT_INC(StatClockAcquire);

@@ -116,50 +139,46 @@ void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {

return;

}

- // Check if we've already acquired src after the last release operation on src

bool acquired = false;

- if (nclk > tid_) {

- if (src->elem(tid_).reused == reused_) {

- for (unsigned i = 0; i < kDirtyTids; i++) {

- unsigned tid = src->dirty_tids_[i];

- if (tid != kInvalidTid) {

- u64 epoch = src->elem(tid).epoch;

- if (clk_[tid].epoch < epoch) {

- clk_[tid].epoch = epoch;

- acquired = true;

- }

- if (acquired) {

- CPP_STAT_INC(StatClockAcquiredSomething);

- last_acquire_ = clk_[tid_].epoch;

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ SyncClock::Dirty dirty = src->dirty_[i];

+ unsigned tid = dirty.tid;

+ if (tid != kInvalidTid) {

+ if (clk_[tid] < dirty.epoch) {

+ clk_[tid] = dirty.epoch;

+ acquired = true;

}

- return;

}

- // O(N) acquire.

- CPP_STAT_INC(StatClockAcquireFull);

- nclk_ = max(nclk_, nclk);

- for (uptr i = 0; i < nclk; i++) {

- u64 epoch = src->elem(i).epoch;

- if (clk_[i].epoch < epoch) {

- clk_[i].epoch = epoch;

- acquired = true;

+ // Check if we've already acquired src after the last release operation on src

+ if (tid_ >= nclk || src->elem(tid_).reused != reused_) {

+ // O(N) acquire.

+ CPP_STAT_INC(StatClockAcquireFull);

+ nclk_ = max(nclk_, nclk);

+ u64 *dst_pos = &clk_[0];

+ for (ClockElem &src_elem : *src) {

+ u64 epoch = src_elem.epoch;

+ if (*dst_pos < epoch) {

+ *dst_pos = epoch;

+ acquired = true;

+ }

+ dst_pos++;

}

- }

- // Remember that this thread has acquired this clock.

- if (nclk > tid_)

- src->elem(tid_).reused = reused_;

+ // Remember that this thread has acquired this clock.

+ if (nclk > tid_)

+ src->elem(tid_).reused = reused_;

+ }

if (acquired) {

CPP_STAT_INC(StatClockAcquiredSomething);

- last_acquire_ = clk_[tid_].epoch;

+ last_acquire_ = clk_[tid_];

+ ResetCached(c);

}

-void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

+void ThreadClock::release(ClockCache *c, SyncClock *dst) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(dst->size_, kMaxTid);

@@ -179,7 +198,7 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

// since the last release on dst. If so, we need to update

// only dst->elem(tid_).

if (dst->elem(tid_).epoch > last_acquire_) {

- UpdateCurrentThread(dst);

+ UpdateCurrentThread(c, dst);

if (dst->release_store_tid_ != tid_ ||

dst->release_store_reused_ != reused_)

dst->release_store_tid_ = kInvalidTid;

@@ -188,23 +207,24 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

// O(N) release.

CPP_STAT_INC(StatClockReleaseFull);

+ dst->Unshare(c);

// First, remember whether we've acquired dst.

bool acquired = IsAlreadyAcquired(dst);

if (acquired)

CPP_STAT_INC(StatClockReleaseAcquired);

// Update dst->clk_.

- for (uptr i = 0; i < nclk_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = max(ce.epoch, clk_[i].epoch);

+ dst->FlushDirty();

+ uptr i = 0;

+ for (ClockElem &ce : *dst) {

+ ce.epoch = max(ce.epoch, clk_[i]);

ce.reused = 0;

+ i++;

}

// Clear 'acquired' flag in the remaining elements.

if (nclk_ < dst->size_)

CPP_STAT_INC(StatClockReleaseClearTail);

for (uptr i = nclk_; i < dst->size_; i++)

dst->elem(i).reused = 0;

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

dst->release_store_tid_ = kInvalidTid;

dst->release_store_reused_ = 0;

// If we've acquired dst, remember this fact,

@@ -213,11 +233,37 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

dst->elem(tid_).reused = reused_;

}

-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {

+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(dst->size_, kMaxTid);

CPP_STAT_INC(StatClockStore);

+ if (dst->size_ == 0 && cached_idx_ != 0) {

+ // Reuse the cached clock.

+ // Note: we could reuse/cache the cached clock in more cases:

+ // we could update the existing clock and cache it, or replace it with the

+ // currently cached clock and release the old one. And for a shared

+ // existing clock, we could replace it with the currently cached;

+ // or unshare, update and cache. But, for simplicity, we currnetly reuse

+ // cached clock only when the target clock is empty.

+ dst->tab_ = ctx->clock_alloc.Map(cached_idx_);

+ dst->tab_idx_ = cached_idx_;

+ dst->size_ = cached_size_;

+ dst->blocks_ = cached_blocks_;

+ CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);

+ // The cached clock is shared (immutable),

+ // so this is where we store the current clock.

+ dst->dirty_[0].tid = tid_;

+ dst->dirty_[0].epoch = clk_[tid_];

+ dst->release_store_tid_ = tid_;

+ dst->release_store_reused_ = reused_;

+ // Rememeber that we don't need to acquire it in future.

+ dst->elem(tid_).reused = reused_;

+ // Grab a reference.

+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);

+ return;

+ }

// Check if we need to resize dst.

if (dst->size_ < nclk_)

dst->Resize(c, nclk_);

@@ -226,32 +272,41 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {

dst->release_store_reused_ == reused_ &&

dst->elem(tid_).epoch > last_acquire_) {

CPP_STAT_INC(StatClockStoreFast);

- UpdateCurrentThread(dst);

+ UpdateCurrentThread(c, dst);

return;

}

// O(N) release-store.

CPP_STAT_INC(StatClockStoreFull);

- for (uptr i = 0; i < nclk_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = clk_[i].epoch;

+ dst->Unshare(c);

+ // Note: dst can be larger than this ThreadClock.

+ // This is fine since clk_ beyond size is all zeros.

+ uptr i = 0;

+ for (ClockElem &ce : *dst) {

+ ce.epoch = clk_[i];

ce.reused = 0;

+ i++;

}

- // Clear the tail of dst->clk_.

- if (nclk_ < dst->size_) {

- for (uptr i = nclk_; i < dst->size_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = 0;

- ce.reused = 0;

- }

- CPP_STAT_INC(StatClockStoreTail);

- }

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

+ for (uptr i = 0; i < kDirtyTids; i++)

+ dst->dirty_[i].tid = kInvalidTid;

dst->release_store_tid_ = tid_;

dst->release_store_reused_ = reused_;

// Rememeber that we don't need to acquire it in future.

dst->elem(tid_).reused = reused_;

+ // If the resulting clock is cachable, cache it for future release operations.

+ // The clock is always cachable if we released to an empty sync object.

+ if (cached_idx_ == 0 && dst->Cachable()) {

+ // Grab a reference to the ClockBlock.

+ atomic_uint32_t *ref = ref_ptr(dst->tab_);

+ if (atomic_load(ref, memory_order_acquire) == 1)

+ atomic_store_relaxed(ref, 2);

+ else

+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);

+ cached_idx_ = dst->tab_idx_;

+ cached_size_ = dst->size_;

+ cached_blocks_ = dst->blocks_;

+ }

}

void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {

@@ -261,37 +316,36 @@ void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {

}

// Updates only single element related to the current thread in dst->clk_.

-void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {

+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {

// Update the threads time, but preserve 'acquired' flag.

- dst->elem(tid_).epoch = clk_[tid_].epoch;

for (unsigned i = 0; i < kDirtyTids; i++) {

- if (dst->dirty_tids_[i] == tid_) {

+ SyncClock::Dirty *dirty = &dst->dirty_[i];

+ const unsigned tid = dirty->tid;

+ if (tid == tid_ || tid == kInvalidTid) {

CPP_STAT_INC(StatClockReleaseFast);

- return;

- }

- if (dst->dirty_tids_[i] == kInvalidTid) {

- CPP_STAT_INC(StatClockReleaseFast);

- dst->dirty_tids_[i] = tid_;

+ dirty->tid = tid_;

+ dirty->epoch = clk_[tid_];

return;

}

// Reset all 'acquired' flags, O(N).

+ // We are going to touch dst elements, so we need to unshare it.

+ dst->Unshare(c);

CPP_STAT_INC(StatClockReleaseSlow);

+ dst->elem(tid_).epoch = clk_[tid_];

for (uptr i = 0; i < dst->size_; i++)

dst->elem(i).reused = 0;

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

+ dst->FlushDirty();

}

-// Checks whether the current threads has already acquired src.

+// Checks whether the current thread has already acquired src.

bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {

if (src->elem(tid_).reused != reused_)

return false;

for (unsigned i = 0; i < kDirtyTids; i++) {

- unsigned tid = src->dirty_tids_[i];

- if (tid != kInvalidTid) {

- if (clk_[tid].epoch < src->elem(tid).epoch)

+ SyncClock::Dirty dirty = src->dirty_[i];

+ if (dirty.tid != kInvalidTid) {

+ if (clk_[dirty.tid] < dirty.epoch)

return false;

}

@@ -302,22 +356,19 @@ bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {

// This function is called only from weird places like AcquireGlobal.

void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {

DCHECK_LT(tid, kMaxTid);

- DCHECK_GE(v, clk_[tid].epoch);

- clk_[tid].epoch = v;

+ DCHECK_GE(v, clk_[tid]);

+ clk_[tid] = v;

if (nclk_ <= tid)

nclk_ = tid + 1;

- last_acquire_ = clk_[tid_].epoch;

+ last_acquire_ = clk_[tid_];

+ ResetCached(c);

}

void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {

printf("clock=[");

for (uptr i = 0; i < nclk_; i++)

- printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);

- printf("] reused=[");

- for (uptr i = 0; i < nclk_; i++)

- printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);

- printf("] tid=%u/%u last_acq=%llu",

- tid_, reused_, last_acquire_);

+ printf("%s%llu", i == 0 ? "" : ",", clk_[i]);

+ printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);

}

SyncClock::SyncClock() {

@@ -327,22 +378,14 @@ SyncClock::SyncClock() {

SyncClock::~SyncClock() {

// Reset must be called before dtor.

CHECK_EQ(size_, 0);

+ CHECK_EQ(blocks_, 0);

CHECK_EQ(tab_, 0);

CHECK_EQ(tab_idx_, 0);

}

void SyncClock::Reset(ClockCache *c) {

- if (size_ == 0) {

- // nothing

- } else if (size_ <= ClockBlock::kClockCount) {

- // One-level table.

- ctx->clock_alloc.Free(c, tab_idx_);

- } else {

- // Two-level table.

- for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)

- ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);

- ctx->clock_alloc.Free(c, tab_idx_);

- }

+ if (size_)

+ UnrefClockBlock(c, tab_idx_, blocks_);

ResetImpl();

}

@@ -350,66 +393,171 @@ void SyncClock::ResetImpl() {

tab_ = 0;

tab_idx_ = 0;

size_ = 0;

+ blocks_ = 0;

release_store_tid_ = kInvalidTid;

release_store_reused_ = 0;

for (uptr i = 0; i < kDirtyTids; i++)

- dirty_tids_[i] = kInvalidTid;

+ dirty_[i].tid = kInvalidTid;

}

void SyncClock::Resize(ClockCache *c, uptr nclk) {

CPP_STAT_INC(StatClockReleaseResize);

- if (RoundUpTo(nclk, ClockBlock::kClockCount) <=

- RoundUpTo(size_, ClockBlock::kClockCount)) {

- // Growing within the same block.

+ Unshare(c);

+ if (nclk <= capacity()) {

// Memory is already allocated, just increase the size.

size_ = nclk;

return;

}

- if (nclk <= ClockBlock::kClockCount) {

+ if (size_ == 0) {

// Grow from 0 to one-level table.

CHECK_EQ(size_, 0);

+ CHECK_EQ(blocks_, 0);

CHECK_EQ(tab_, 0);

CHECK_EQ(tab_idx_, 0);

- size_ = nclk;

- tab_idx_ = ctx->clock_alloc.Alloc(c);

- tab_ = ctx->clock_alloc.Map(tab_idx_);

- internal_memset(tab_, 0, sizeof(*tab_));

- return;

- }

- // Growing two-level table.

- if (size_ == 0) {

- // Allocate first level table.

- tab_idx_ = ctx->clock_alloc.Alloc(c);

- tab_ = ctx->clock_alloc.Map(tab_idx_);

- internal_memset(tab_, 0, sizeof(*tab_));

- } else if (size_ <= ClockBlock::kClockCount) {

- // Transform one-level table to two-level table.

- u32 old = tab_idx_;

tab_idx_ = ctx->clock_alloc.Alloc(c);

tab_ = ctx->clock_alloc.Map(tab_idx_);

internal_memset(tab_, 0, sizeof(*tab_));

- tab_->table[0] = old;

+ atomic_store_relaxed(ref_ptr(tab_), 1);

+ size_ = 1;

+ } else if (size_ > blocks_ * ClockBlock::kClockCount) {

+ u32 idx = ctx->clock_alloc.Alloc(c);

+ ClockBlock *new_cb = ctx->clock_alloc.Map(idx);

+ uptr top = size_ - blocks_ * ClockBlock::kClockCount;

+ CHECK_LT(top, ClockBlock::kClockCount);

+ const uptr move = top * sizeof(tab_->clock[0]);

+ internal_memcpy(&new_cb->clock[0], tab_->clock, move);

+ internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);

+ internal_memset(tab_->clock, 0, move);

+ append_block(idx);

}

- // At this point we have first level table allocated.

+ // At this point we have first level table allocated and all clock elements

+ // are evacuated from it to a second level block.

// Add second level tables as necessary.

- for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);

- i < nclk; i += ClockBlock::kClockCount) {

+ while (nclk > capacity()) {

u32 idx = ctx->clock_alloc.Alloc(c);

ClockBlock *cb = ctx->clock_alloc.Map(idx);

internal_memset(cb, 0, sizeof(*cb));

- CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);

- tab_->table[i/ClockBlock::kClockCount] = idx;

+ append_block(idx);

}

size_ = nclk;

}

-ClockElem &SyncClock::elem(unsigned tid) const {

+// Flushes all dirty elements into the main clock array.

+void SyncClock::FlushDirty() {

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ Dirty *dirty = &dirty_[i];

+ if (dirty->tid != kInvalidTid) {

+ CHECK_LT(dirty->tid, size_);

+ elem(dirty->tid).epoch = dirty->epoch;

+ dirty->tid = kInvalidTid;

+ }

+bool SyncClock::IsShared() const {

+ if (size_ == 0)

+ return false;

+ atomic_uint32_t *ref = ref_ptr(tab_);

+ u32 v = atomic_load(ref, memory_order_acquire);

+ CHECK_GT(v, 0);

+ return v > 1;

+// Unshares the current clock if it's shared.

+// Shared clocks are immutable, so they need to be unshared before any updates.

+// Note: this does not apply to dirty entries as they are not shared.

+void SyncClock::Unshare(ClockCache *c) {

+ if (!IsShared())

+ return;

+ // First, copy current state into old.

+ SyncClock old;

+ old.tab_ = tab_;

+ old.tab_idx_ = tab_idx_;

+ old.size_ = size_;

+ old.blocks_ = blocks_;

+ old.release_store_tid_ = release_store_tid_;

+ old.release_store_reused_ = release_store_reused_;

+ for (unsigned i = 0; i < kDirtyTids; i++)

+ old.dirty_[i] = dirty_[i];

+ // Then, clear current object.

+ ResetImpl();

+ // Allocate brand new clock in the current object.

+ Resize(c, old.size_);

+ // Now copy state back into this object.

+ Iter old_iter(&old);

+ for (ClockElem &ce : *this) {

+ ce = *old_iter;

+ ++old_iter;

+ }

+ release_store_tid_ = old.release_store_tid_;

+ release_store_reused_ = old.release_store_reused_;

+ for (unsigned i = 0; i < kDirtyTids; i++)

+ dirty_[i] = old.dirty_[i];

+ // Drop reference to old and delete if necessary.

+ old.Reset(c);

+// Can we cache this clock for future release operations?

+ALWAYS_INLINE bool SyncClock::Cachable() const {

+ if (size_ == 0)

+ return false;

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ if (dirty_[i].tid != kInvalidTid)

+ return false;

+ }

+ return atomic_load_relaxed(ref_ptr(tab_)) == 1;

+// elem linearizes the two-level structure into linear array.

+// Note: this is used only for one time accesses, vector operations use

+// the iterator as it is much faster.

+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {

DCHECK_LT(tid, size_);

- if (size_ <= ClockBlock::kClockCount)

+ const uptr block = tid / ClockBlock::kClockCount;

+ DCHECK_LE(block, blocks_);

+ tid %= ClockBlock::kClockCount;

+ if (block == blocks_)

return tab_->clock[tid];

- u32 idx = tab_->table[tid / ClockBlock::kClockCount];

+ u32 idx = get_block(block);

ClockBlock *cb = ctx->clock_alloc.Map(idx);

- return cb->clock[tid % ClockBlock::kClockCount];

+ return cb->clock[tid];

+ALWAYS_INLINE uptr SyncClock::capacity() const {

+ if (size_ == 0)

+ return 0;

+ uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);

+ // How many clock elements we can fit into the first level block.

+ // +1 for ref counter.

+ uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;

+ return blocks_ * ClockBlock::kClockCount + top;

+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {

+ DCHECK(size_);

+ DCHECK_LT(bi, blocks_);

+ return tab_->table[ClockBlock::kBlockIdx - bi];

+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {

+ uptr bi = blocks_++;

+ CHECK_EQ(get_block(bi), 0);

+ tab_->table[ClockBlock::kBlockIdx - bi] = idx;

+// Used only by tests.

+u64 SyncClock::get(unsigned tid) const {

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ Dirty dirty = dirty_[i];

+ if (dirty.tid == tid)

+ return dirty.epoch;

+ }

+ return elem(tid).epoch;

+// Used only by Iter test.

+u64 SyncClock::get_clean(unsigned tid) const {

+ return elem(tid).epoch;

}

void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {

@@ -419,8 +567,32 @@ void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {

printf("] reused=[");

for (uptr i = 0; i < size_; i++)

printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);

- printf("] release_store_tid=%d/%d dirty_tids=%d/%d",

+ printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",

release_store_tid_, release_store_reused_,

- dirty_tids_[0], dirty_tids_[1]);

+ dirty_[0].tid, dirty_[0].epoch,

+ dirty_[1].tid, dirty_[1].epoch);

+void SyncClock::Iter::Next() {

+ // Finished with the current block, move on to the next one.

+ block_++;

+ if (block_ < parent_->blocks_) {

+ // Iterate over the next second level block.

+ u32 idx = parent_->get_block(block_);

+ ClockBlock *cb = ctx->clock_alloc.Map(idx);

+ pos_ = &cb->clock[0];

+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,

+ ClockBlock::kClockCount);

+ return;

+ }

+ if (block_ == parent_->blocks_ &&

+ parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {

+ // Iterate over elements in the first level block.

+ pos_ = &parent_->tab_->clock[0];

+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,

+ ClockBlock::kClockCount);

+ return;

+ }

+ parent_ = nullptr; // denotes end

}

} // namespace __tsan

diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index 378b550fd11b..a891d7bbd889 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h

@@ -18,25 +18,6 @@

namespace __tsan {

-struct ClockElem {

- u64 epoch : kClkBits;

- u64 reused : 64 - kClkBits;

-};

-struct ClockBlock {

- static const uptr kSize = 512;

- static const uptr kTableSize = kSize / sizeof(u32);

- static const uptr kClockCount = kSize / sizeof(ClockElem);

- union {

- u32 table[kTableSize];

- ClockElem clock[kClockCount];

- };

- ClockBlock() {

- }

-};

typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;

typedef DenseSlabAllocCache ClockCache;

@@ -46,69 +27,117 @@ class SyncClock {

SyncClock();

~SyncClock();

- uptr size() const {

- return size_;

- }

+ uptr size() const;

- u64 get(unsigned tid) const {

- return elem(tid).epoch;

- }

+ // These are used only in tests.

+ u64 get(unsigned tid) const;

+ u64 get_clean(unsigned tid) const;

void Resize(ClockCache *c, uptr nclk);

void Reset(ClockCache *c);

void DebugDump(int(*printf)(const char *s, ...));

+ // Clock element iterator.

+ // Note: it iterates only over the table without regard to dirty entries.

+ class Iter {

+ public:

+ explicit Iter(SyncClock* parent);

+ Iter& operator++();

+ bool operator!=(const Iter& other);

+ ClockElem &operator*();

+ private:

+ SyncClock *parent_;

+ // [pos_, end_) is the current continuous range of clock elements.

+ ClockElem *pos_;

+ ClockElem *end_;

+ int block_; // Current number of second level block.

+ NOINLINE void Next();

+ };

+ Iter begin();

+ Iter end();

private:

- friend struct ThreadClock;

+ friend class ThreadClock;

+ friend class Iter;

static const uptr kDirtyTids = 2;

+ struct Dirty {

+ u64 epoch : kClkBits;

+ u64 tid : 64 - kClkBits; // kInvalidId if not active

+ };

unsigned release_store_tid_;

unsigned release_store_reused_;

- unsigned dirty_tids_[kDirtyTids];

- // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.

- // If size_ <= 64, then tab_ points to an array with 64 ClockElem's.

- // Otherwise, tab_ points to an array with 128 u32 elements,

+ Dirty dirty_[kDirtyTids];

+ // If size_ is 0, tab_ is nullptr.

+ // If size <= 64 (kClockCount), tab_ contains pointer to an array with

+ // 64 ClockElem's (ClockBlock::clock).

+ // Otherwise, tab_ points to an array with up to 127 u32 elements,

// each pointing to the second-level 512b block with 64 ClockElem's.

+ // Unused space in the first level ClockBlock is used to store additional

+ // clock elements.

+ // The last u32 element in the first level ClockBlock is always used as

+ // reference counter.

+ //

+ // See the following scheme for details.

+ // All memory blocks are 512 bytes (allocated from ClockAlloc).

+ // Clock (clk) elements are 64 bits.

+ // Idx and ref are 32 bits.

+ //

+ // tab_

+ // |

+ // \/

+ // +----------------------------------------------------+

+ // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |

+ // +----------------------------------------------------+

+ // | |

+ // | \/

+ // | +----------------+

+ // | | clk0 ... clk63 |

+ // | +----------------+

+ // \/

+ // +------------------+

+ // | clk64 ... clk127 |

+ // +------------------+

+ //

+ // Note: dirty entries, if active, always override what's stored in the clock.

ClockBlock *tab_;

u32 tab_idx_;

- u32 size_;

+ u16 size_;

+ u16 blocks_; // Number of second level blocks.

+ void Unshare(ClockCache *c);

+ bool IsShared() const;

+ bool Cachable() const;

void ResetImpl();

+ void FlushDirty();

+ uptr capacity() const;

+ u32 get_block(uptr bi) const;

+ void append_block(u32 idx);

ClockElem &elem(unsigned tid) const;

};

// The clock that lives in threads.

-struct ThreadClock {

+class ThreadClock {

public:

typedef DenseSlabAllocCache Cache;

explicit ThreadClock(unsigned tid, unsigned reused = 0);

- u64 get(unsigned tid) const {

- DCHECK_LT(tid, kMaxTidInClock);

- return clk_[tid].epoch;

- }

+ u64 get(unsigned tid) const;

void set(ClockCache *c, unsigned tid, u64 v);

+ void set(u64 v);

+ void tick();

+ uptr size() const;

- void set(u64 v) {

- DCHECK_GE(v, clk_[tid_].epoch);

- clk_[tid_].epoch = v;

- }

- void tick() {

- clk_[tid_].epoch++;

- }

- uptr size() const {

- return nclk_;

- }

- void acquire(ClockCache *c, const SyncClock *src);

- void release(ClockCache *c, SyncClock *dst) const;

+ void acquire(ClockCache *c, SyncClock *src);

+ void release(ClockCache *c, SyncClock *dst);

void acq_rel(ClockCache *c, SyncClock *dst);

- void ReleaseStore(ClockCache *c, SyncClock *dst) const;

+ void ReleaseStore(ClockCache *c, SyncClock *dst);

void ResetCached(ClockCache *c);

void DebugReset();

@@ -116,16 +145,82 @@ struct ThreadClock {

private:

static const uptr kDirtyTids = SyncClock::kDirtyTids;

+ // Index of the thread associated with he clock ("current thread").

const unsigned tid_;

- const unsigned reused_;

+ const unsigned reused_; // tid_ reuse count.

+ // Current thread time when it acquired something from other threads.

u64 last_acquire_;

+ // Cached SyncClock (without dirty entries and release_store_tid_).

+ // We reuse it for subsequent store-release operations without intervening

+ // acquire operations. Since it is shared (and thus constant), clock value

+ // for the current thread is then stored in dirty entries in the SyncClock.

+ // We host a refernece to the table while it is cached here.

+ u32 cached_idx_;

+ u16 cached_size_;

+ u16 cached_blocks_;

+ // Number of active elements in the clk_ table (the rest is zeros).

uptr nclk_;

- ClockElem clk_[kMaxTidInClock];

+ u64 clk_[kMaxTidInClock]; // Fixed size vector clock.

bool IsAlreadyAcquired(const SyncClock *src) const;

- void UpdateCurrentThread(SyncClock *dst) const;

+ void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;

};

+ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {

+ DCHECK_LT(tid, kMaxTidInClock);

+ return clk_[tid];

+ALWAYS_INLINE void ThreadClock::set(u64 v) {

+ DCHECK_GE(v, clk_[tid_]);

+ clk_[tid_] = v;

+ALWAYS_INLINE void ThreadClock::tick() {

+ clk_[tid_]++;

+ALWAYS_INLINE uptr ThreadClock::size() const {

+ return nclk_;

+ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {

+ return Iter(this);

+ALWAYS_INLINE SyncClock::Iter SyncClock::end() {

+ return Iter(nullptr);

+ALWAYS_INLINE uptr SyncClock::size() const {

+ return size_;

+ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)

+ : parent_(parent)

+ , pos_(nullptr)

+ , end_(nullptr)

+ , block_(-1) {

+ if (parent)

+ Next();

+ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {

+ pos_++;

+ if (UNLIKELY(pos_ >= end_))

+ Next();

+ return *this;

+ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {

+ return parent_ != other.parent_;

+ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {

+ return *pos_;

} // namespace __tsan

#endif // TSAN_CLOCK_H

diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 8977fea7c552..3c775debfb09 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h

@@ -38,15 +38,40 @@

namespace __tsan {

+const int kClkBits = 42;

+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;

+struct ClockElem {

+ u64 epoch : kClkBits;

+ u64 reused : 64 - kClkBits; // tid reuse count

+};

+struct ClockBlock {

+ static const uptr kSize = 512;

+ static const uptr kTableSize = kSize / sizeof(u32);

+ static const uptr kClockCount = kSize / sizeof(ClockElem);

+ static const uptr kRefIdx = kTableSize - 1;

+ static const uptr kBlockIdx = kTableSize - 2;

+ union {

+ u32 table[kTableSize];

+ ClockElem clock[kClockCount];

+ };

+ ClockBlock() {

+ }

+};

const int kTidBits = 13;

-const unsigned kMaxTid = 1 << kTidBits;

+// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is

+// occupied by reference counter, so total number of elements we can store

+// in SyncClock is kClockCount * (kTableSize - 1).

+const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;

#if !SANITIZER_GO

const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.

#else

const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.

#endif

-const int kClkBits = 42;

-const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;

const uptr kShadowStackSize = 64 * 1024;

// Count of shadow values in a shadow cell.

@@ -74,7 +99,7 @@ const bool kCollectHistory = false;

const bool kCollectHistory = true;

#endif

-const unsigned kInvalidTid = (unsigned)-1;

+const u16 kInvalidTid = kMaxTid + 1;

// The following "build consistency" machinery ensures that all source files

// are built in the same configuration. Inconsistent builds lead to

diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index 1434cf688ce9..f79dccddba9f 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc

@@ -10,6 +10,7 @@

// This file is a part of ThreadSanitizer (TSan), a race detector.

//===----------------------------------------------------------------------===//

+#include "sanitizer_common/sanitizer_allocator_checks.h"

#include "sanitizer_common/sanitizer_allocator_interface.h"

#include "sanitizer_common/sanitizer_common.h"

#include "sanitizer_common/sanitizer_placement_new.h"

diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 0ba01babe69a..ead1e5704989 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc

@@ -286,7 +286,7 @@ void InitializePlatform() {

int ExtractResolvFDs(void *state, int *fds, int nfd) {

#if SANITIZER_LINUX && !SANITIZER_ANDROID

int cnt = 0;

- __res_state *statp = (__res_state*)state;

+ struct __res_state *statp = (struct __res_state*)state;

for (int i = 0; i < MAXNS && cnt < nfd; i++) {

if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)

fds[cnt++] = statp->_u._ext.nssocks[i];

diff --git a/lib/tsan/tests/unit/tsan_clock_test.cc b/lib/tsan/tests/unit/tsan_clock_test.cc
index 73104dd6b9d4..f6230e1be565 100644
--- a/lib/tsan/tests/unit/tsan_clock_test.cc
+++ b/lib/tsan/tests/unit/tsan_clock_test.cc

@@ -53,6 +53,31 @@ TEST(Clock, ChunkedBasic) {

chunked.Reset(&cache);

}

+static const uptr interesting_sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66,

+ 100, 124, 125, 126, 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254,

+ 255};

+TEST(Clock, Iter) {

+ const uptr n = ARRAY_SIZE(interesting_sizes);

+ for (uptr fi = 0; fi < n; fi++) {

+ const uptr size = interesting_sizes[fi];

+ SyncClock sync;

+ ThreadClock vector(0);

+ for (uptr i = 0; i < size; i++)

+ vector.set(&cache, i, i + 1);

+ if (size != 0)

+ vector.release(&cache, &sync);

+ uptr i = 0;

+ for (ClockElem &ce : sync) {

+ ASSERT_LT(i, size);

+ ASSERT_EQ(sync.get_clean(i), ce.epoch);

+ i++;

+ }

+ ASSERT_EQ(i, size);

+ sync.Reset(&cache);

+ }

TEST(Clock, AcquireRelease) {

ThreadClock vector1(100);

vector1.tick();

@@ -216,13 +241,11 @@ TEST(Clock, Growth) {

TEST(Clock, Growth2) {

// Test clock growth for every pair of sizes:

- const uptr sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66, 100, 124, 125, 126,

- 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254, 255};

- const uptr n = sizeof(sizes) / sizeof(sizes[0]);

+ const uptr n = ARRAY_SIZE(interesting_sizes);

for (uptr fi = 0; fi < n; fi++) {

for (uptr ti = fi + 1; ti < n; ti++) {

- const uptr from = sizes[fi];

- const uptr to = sizes[ti];

+ const uptr from = interesting_sizes[fi];

+ const uptr to = interesting_sizes[ti];

SyncClock sync;

ThreadClock vector(0);

for (uptr i = 0; i < from; i++)