aboutsummaryrefslogtreecommitdiff
path: root/lib/tsan
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:40 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:40 +0000
commit462d72ec21756f5ca1002f128f014d36a845f3d2 (patch)
treeb3d61e87f3a1b79e57d9639497994df822687bf0 /lib/tsan
parent1992b790c2c12b7850bdf86662b67302052ec2fe (diff)
downloadsrc-462d72ec21756f5ca1002f128f014d36a845f3d2.tar.gz
src-462d72ec21756f5ca1002f128f014d36a845f3d2.zip
Notes
Notes: svn path=/vendor/compiler-rt/dist/; revision=321188 svn path=/vendor/compiler-rt/compiler-rt-release_50-r311219/; revision=322731; tag=vendor/compiler-rt/compiler-rt-release_50-r311219
Diffstat (limited to 'lib/tsan')
-rw-r--r--lib/tsan/rtl/tsan_clock.cc436
-rw-r--r--lib/tsan/rtl/tsan_clock.h207
-rw-r--r--lib/tsan/rtl/tsan_defs.h33
-rw-r--r--lib/tsan/rtl/tsan_mman.cc1
-rw-r--r--lib/tsan/rtl/tsan_platform_linux.cc2
-rw-r--r--lib/tsan/tests/unit/tsan_clock_test.cc33
6 files changed, 514 insertions, 198 deletions
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 9ee9104283f8..ef984a45cd9d 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -61,20 +61,13 @@
// an exclusive lock; ThreadClock's are private to respective threads and so
// do not need any protection.
//
-// Description of ThreadClock state:
-// clk_ - fixed size vector clock.
-// nclk_ - effective size of the vector clock (the rest is zeros).
-// tid_ - index of the thread associated with he clock ("current thread").
-// last_acquire_ - current thread time when it acquired something from
-// other threads.
-//
// Description of SyncClock state:
// clk_ - variable size vector clock, low kClkBits hold timestamp,
// the remaining bits hold "acquired" flag (the actual value is thread's
// reused counter);
// if acquried == thr->reused_, then the respective thread has already
-// acquired this clock (except possibly dirty_tids_).
-// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+// acquired this clock (except possibly for dirty elements).
+// dirty_ - holds up to two indeces in the vector clock that other threads
// need to acquire regardless of "acquired" flag value;
// release_store_tid_ - denotes that the clock state is a result of
// release-store operation by the thread with release_store_tid_ index.
@@ -90,21 +83,51 @@
namespace __tsan {
+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
+ return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
+}
+
+// Drop reference to the first level block idx.
+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
+ ClockBlock *cb = ctx->clock_alloc.Map(idx);
+ atomic_uint32_t *ref = ref_ptr(cb);
+ u32 v = atomic_load(ref, memory_order_acquire);
+ for (;;) {
+ CHECK_GT(v, 0);
+ if (v == 1)
+ break;
+ if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
+ return;
+ }
+ // First level block owns second level blocks, so them as well.
+ for (uptr i = 0; i < blocks; i++)
+ ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
+ ctx->clock_alloc.Free(c, idx);
+}
+
ThreadClock::ThreadClock(unsigned tid, unsigned reused)
: tid_(tid)
- , reused_(reused + 1) { // 0 has special meaning
+ , reused_(reused + 1) // 0 has special meaning
+ , cached_idx_()
+ , cached_size_()
+ , cached_blocks_() {
CHECK_LT(tid, kMaxTidInClock);
CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
nclk_ = tid_ + 1;
last_acquire_ = 0;
internal_memset(clk_, 0, sizeof(clk_));
- clk_[tid_].reused = reused_;
}
void ThreadClock::ResetCached(ClockCache *c) {
+ if (cached_idx_) {
+ UnrefClockBlock(c, cached_idx_, cached_blocks_);
+ cached_idx_ = 0;
+ cached_size_ = 0;
+ cached_blocks_ = 0;
+ }
}
-void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
DCHECK_LE(nclk_, kMaxTid);
DCHECK_LE(src->size_, kMaxTid);
CPP_STAT_INC(StatClockAcquire);
@@ -116,50 +139,46 @@ void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
return;
}
- // Check if we've already acquired src after the last release operation on src
bool acquired = false;
- if (nclk > tid_) {
- if (src->elem(tid_).reused == reused_) {
- for (unsigned i = 0; i < kDirtyTids; i++) {
- unsigned tid = src->dirty_tids_[i];
- if (tid != kInvalidTid) {
- u64 epoch = src->elem(tid).epoch;
- if (clk_[tid].epoch < epoch) {
- clk_[tid].epoch = epoch;
- acquired = true;
- }
- }
- }
- if (acquired) {
- CPP_STAT_INC(StatClockAcquiredSomething);
- last_acquire_ = clk_[tid_].epoch;
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ SyncClock::Dirty dirty = src->dirty_[i];
+ unsigned tid = dirty.tid;
+ if (tid != kInvalidTid) {
+ if (clk_[tid] < dirty.epoch) {
+ clk_[tid] = dirty.epoch;
+ acquired = true;
}
- return;
}
}
- // O(N) acquire.
- CPP_STAT_INC(StatClockAcquireFull);
- nclk_ = max(nclk_, nclk);
- for (uptr i = 0; i < nclk; i++) {
- u64 epoch = src->elem(i).epoch;
- if (clk_[i].epoch < epoch) {
- clk_[i].epoch = epoch;
- acquired = true;
+ // Check if we've already acquired src after the last release operation on src
+ if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
+ // O(N) acquire.
+ CPP_STAT_INC(StatClockAcquireFull);
+ nclk_ = max(nclk_, nclk);
+ u64 *dst_pos = &clk_[0];
+ for (ClockElem &src_elem : *src) {
+ u64 epoch = src_elem.epoch;
+ if (*dst_pos < epoch) {
+ *dst_pos = epoch;
+ acquired = true;
+ }
+ dst_pos++;
}
- }
- // Remember that this thread has acquired this clock.
- if (nclk > tid_)
- src->elem(tid_).reused = reused_;
+ // Remember that this thread has acquired this clock.
+ if (nclk > tid_)
+ src->elem(tid_).reused = reused_;
+ }
if (acquired) {
CPP_STAT_INC(StatClockAcquiredSomething);
- last_acquire_ = clk_[tid_].epoch;
+ last_acquire_ = clk_[tid_];
+ ResetCached(c);
}
}
-void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::release(ClockCache *c, SyncClock *dst) {
DCHECK_LE(nclk_, kMaxTid);
DCHECK_LE(dst->size_, kMaxTid);
@@ -179,7 +198,7 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
// since the last release on dst. If so, we need to update
// only dst->elem(tid_).
if (dst->elem(tid_).epoch > last_acquire_) {
- UpdateCurrentThread(dst);
+ UpdateCurrentThread(c, dst);
if (dst->release_store_tid_ != tid_ ||
dst->release_store_reused_ != reused_)
dst->release_store_tid_ = kInvalidTid;
@@ -188,23 +207,24 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
// O(N) release.
CPP_STAT_INC(StatClockReleaseFull);
+ dst->Unshare(c);
// First, remember whether we've acquired dst.
bool acquired = IsAlreadyAcquired(dst);
if (acquired)
CPP_STAT_INC(StatClockReleaseAcquired);
// Update dst->clk_.
- for (uptr i = 0; i < nclk_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = max(ce.epoch, clk_[i].epoch);
+ dst->FlushDirty();
+ uptr i = 0;
+ for (ClockElem &ce : *dst) {
+ ce.epoch = max(ce.epoch, clk_[i]);
ce.reused = 0;
+ i++;
}
// Clear 'acquired' flag in the remaining elements.
if (nclk_ < dst->size_)
CPP_STAT_INC(StatClockReleaseClearTail);
for (uptr i = nclk_; i < dst->size_; i++)
dst->elem(i).reused = 0;
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
dst->release_store_tid_ = kInvalidTid;
dst->release_store_reused_ = 0;
// If we've acquired dst, remember this fact,
@@ -213,11 +233,37 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
dst->elem(tid_).reused = reused_;
}
-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
DCHECK_LE(nclk_, kMaxTid);
DCHECK_LE(dst->size_, kMaxTid);
CPP_STAT_INC(StatClockStore);
+ if (dst->size_ == 0 && cached_idx_ != 0) {
+ // Reuse the cached clock.
+ // Note: we could reuse/cache the cached clock in more cases:
+ // we could update the existing clock and cache it, or replace it with the
+ // currently cached clock and release the old one. And for a shared
+ // existing clock, we could replace it with the currently cached;
+ // or unshare, update and cache. But, for simplicity, we currnetly reuse
+ // cached clock only when the target clock is empty.
+ dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
+ dst->tab_idx_ = cached_idx_;
+ dst->size_ = cached_size_;
+ dst->blocks_ = cached_blocks_;
+ CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
+ // The cached clock is shared (immutable),
+ // so this is where we store the current clock.
+ dst->dirty_[0].tid = tid_;
+ dst->dirty_[0].epoch = clk_[tid_];
+ dst->release_store_tid_ = tid_;
+ dst->release_store_reused_ = reused_;
+ // Rememeber that we don't need to acquire it in future.
+ dst->elem(tid_).reused = reused_;
+ // Grab a reference.
+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+ return;
+ }
+
// Check if we need to resize dst.
if (dst->size_ < nclk_)
dst->Resize(c, nclk_);
@@ -226,32 +272,41 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
dst->release_store_reused_ == reused_ &&
dst->elem(tid_).epoch > last_acquire_) {
CPP_STAT_INC(StatClockStoreFast);
- UpdateCurrentThread(dst);
+ UpdateCurrentThread(c, dst);
return;
}
// O(N) release-store.
CPP_STAT_INC(StatClockStoreFull);
- for (uptr i = 0; i < nclk_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = clk_[i].epoch;
+ dst->Unshare(c);
+ // Note: dst can be larger than this ThreadClock.
+ // This is fine since clk_ beyond size is all zeros.
+ uptr i = 0;
+ for (ClockElem &ce : *dst) {
+ ce.epoch = clk_[i];
ce.reused = 0;
+ i++;
}
- // Clear the tail of dst->clk_.
- if (nclk_ < dst->size_) {
- for (uptr i = nclk_; i < dst->size_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = 0;
- ce.reused = 0;
- }
- CPP_STAT_INC(StatClockStoreTail);
- }
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
+ for (uptr i = 0; i < kDirtyTids; i++)
+ dst->dirty_[i].tid = kInvalidTid;
dst->release_store_tid_ = tid_;
dst->release_store_reused_ = reused_;
// Rememeber that we don't need to acquire it in future.
dst->elem(tid_).reused = reused_;
+
+ // If the resulting clock is cachable, cache it for future release operations.
+ // The clock is always cachable if we released to an empty sync object.
+ if (cached_idx_ == 0 && dst->Cachable()) {
+ // Grab a reference to the ClockBlock.
+ atomic_uint32_t *ref = ref_ptr(dst->tab_);
+ if (atomic_load(ref, memory_order_acquire) == 1)
+ atomic_store_relaxed(ref, 2);
+ else
+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+ cached_idx_ = dst->tab_idx_;
+ cached_size_ = dst->size_;
+ cached_blocks_ = dst->blocks_;
+ }
}
void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
@@ -261,37 +316,36 @@ void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
}
// Updates only single element related to the current thread in dst->clk_.
-void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
// Update the threads time, but preserve 'acquired' flag.
- dst->elem(tid_).epoch = clk_[tid_].epoch;
-
for (unsigned i = 0; i < kDirtyTids; i++) {
- if (dst->dirty_tids_[i] == tid_) {
+ SyncClock::Dirty *dirty = &dst->dirty_[i];
+ const unsigned tid = dirty->tid;
+ if (tid == tid_ || tid == kInvalidTid) {
CPP_STAT_INC(StatClockReleaseFast);
- return;
- }
- if (dst->dirty_tids_[i] == kInvalidTid) {
- CPP_STAT_INC(StatClockReleaseFast);
- dst->dirty_tids_[i] = tid_;
+ dirty->tid = tid_;
+ dirty->epoch = clk_[tid_];
return;
}
}
// Reset all 'acquired' flags, O(N).
+ // We are going to touch dst elements, so we need to unshare it.
+ dst->Unshare(c);
CPP_STAT_INC(StatClockReleaseSlow);
+ dst->elem(tid_).epoch = clk_[tid_];
for (uptr i = 0; i < dst->size_; i++)
dst->elem(i).reused = 0;
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
+ dst->FlushDirty();
}
-// Checks whether the current threads has already acquired src.
+// Checks whether the current thread has already acquired src.
bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
if (src->elem(tid_).reused != reused_)
return false;
for (unsigned i = 0; i < kDirtyTids; i++) {
- unsigned tid = src->dirty_tids_[i];
- if (tid != kInvalidTid) {
- if (clk_[tid].epoch < src->elem(tid).epoch)
+ SyncClock::Dirty dirty = src->dirty_[i];
+ if (dirty.tid != kInvalidTid) {
+ if (clk_[dirty.tid] < dirty.epoch)
return false;
}
}
@@ -302,22 +356,19 @@ bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
// This function is called only from weird places like AcquireGlobal.
void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
DCHECK_LT(tid, kMaxTid);
- DCHECK_GE(v, clk_[tid].epoch);
- clk_[tid].epoch = v;
+ DCHECK_GE(v, clk_[tid]);
+ clk_[tid] = v;
if (nclk_ <= tid)
nclk_ = tid + 1;
- last_acquire_ = clk_[tid_].epoch;
+ last_acquire_ = clk_[tid_];
+ ResetCached(c);
}
void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
printf("clock=[");
for (uptr i = 0; i < nclk_; i++)
- printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
- printf("] reused=[");
- for (uptr i = 0; i < nclk_; i++)
- printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
- printf("] tid=%u/%u last_acq=%llu",
- tid_, reused_, last_acquire_);
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
+ printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
}
SyncClock::SyncClock() {
@@ -327,22 +378,14 @@ SyncClock::SyncClock() {
SyncClock::~SyncClock() {
// Reset must be called before dtor.
CHECK_EQ(size_, 0);
+ CHECK_EQ(blocks_, 0);
CHECK_EQ(tab_, 0);
CHECK_EQ(tab_idx_, 0);
}
void SyncClock::Reset(ClockCache *c) {
- if (size_ == 0) {
- // nothing
- } else if (size_ <= ClockBlock::kClockCount) {
- // One-level table.
- ctx->clock_alloc.Free(c, tab_idx_);
- } else {
- // Two-level table.
- for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
- ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
- ctx->clock_alloc.Free(c, tab_idx_);
- }
+ if (size_)
+ UnrefClockBlock(c, tab_idx_, blocks_);
ResetImpl();
}
@@ -350,66 +393,171 @@ void SyncClock::ResetImpl() {
tab_ = 0;
tab_idx_ = 0;
size_ = 0;
+ blocks_ = 0;
release_store_tid_ = kInvalidTid;
release_store_reused_ = 0;
for (uptr i = 0; i < kDirtyTids; i++)
- dirty_tids_[i] = kInvalidTid;
+ dirty_[i].tid = kInvalidTid;
}
void SyncClock::Resize(ClockCache *c, uptr nclk) {
CPP_STAT_INC(StatClockReleaseResize);
- if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
- RoundUpTo(size_, ClockBlock::kClockCount)) {
- // Growing within the same block.
+ Unshare(c);
+ if (nclk <= capacity()) {
// Memory is already allocated, just increase the size.
size_ = nclk;
return;
}
- if (nclk <= ClockBlock::kClockCount) {
+ if (size_ == 0) {
// Grow from 0 to one-level table.
CHECK_EQ(size_, 0);
+ CHECK_EQ(blocks_, 0);
CHECK_EQ(tab_, 0);
CHECK_EQ(tab_idx_, 0);
- size_ = nclk;
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- return;
- }
- // Growing two-level table.
- if (size_ == 0) {
- // Allocate first level table.
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- } else if (size_ <= ClockBlock::kClockCount) {
- // Transform one-level table to two-level table.
- u32 old = tab_idx_;
tab_idx_ = ctx->clock_alloc.Alloc(c);
tab_ = ctx->clock_alloc.Map(tab_idx_);
internal_memset(tab_, 0, sizeof(*tab_));
- tab_->table[0] = old;
+ atomic_store_relaxed(ref_ptr(tab_), 1);
+ size_ = 1;
+ } else if (size_ > blocks_ * ClockBlock::kClockCount) {
+ u32 idx = ctx->clock_alloc.Alloc(c);
+ ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
+ uptr top = size_ - blocks_ * ClockBlock::kClockCount;
+ CHECK_LT(top, ClockBlock::kClockCount);
+ const uptr move = top * sizeof(tab_->clock[0]);
+ internal_memcpy(&new_cb->clock[0], tab_->clock, move);
+ internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
+ internal_memset(tab_->clock, 0, move);
+ append_block(idx);
}
- // At this point we have first level table allocated.
+ // At this point we have first level table allocated and all clock elements
+ // are evacuated from it to a second level block.
// Add second level tables as necessary.
- for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
- i < nclk; i += ClockBlock::kClockCount) {
+ while (nclk > capacity()) {
u32 idx = ctx->clock_alloc.Alloc(c);
ClockBlock *cb = ctx->clock_alloc.Map(idx);
internal_memset(cb, 0, sizeof(*cb));
- CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
- tab_->table[i/ClockBlock::kClockCount] = idx;
+ append_block(idx);
}
size_ = nclk;
}
-ClockElem &SyncClock::elem(unsigned tid) const {
+// Flushes all dirty elements into the main clock array.
+void SyncClock::FlushDirty() {
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ Dirty *dirty = &dirty_[i];
+ if (dirty->tid != kInvalidTid) {
+ CHECK_LT(dirty->tid, size_);
+ elem(dirty->tid).epoch = dirty->epoch;
+ dirty->tid = kInvalidTid;
+ }
+ }
+}
+
+bool SyncClock::IsShared() const {
+ if (size_ == 0)
+ return false;
+ atomic_uint32_t *ref = ref_ptr(tab_);
+ u32 v = atomic_load(ref, memory_order_acquire);
+ CHECK_GT(v, 0);
+ return v > 1;
+}
+
+// Unshares the current clock if it's shared.
+// Shared clocks are immutable, so they need to be unshared before any updates.
+// Note: this does not apply to dirty entries as they are not shared.
+void SyncClock::Unshare(ClockCache *c) {
+ if (!IsShared())
+ return;
+ // First, copy current state into old.
+ SyncClock old;
+ old.tab_ = tab_;
+ old.tab_idx_ = tab_idx_;
+ old.size_ = size_;
+ old.blocks_ = blocks_;
+ old.release_store_tid_ = release_store_tid_;
+ old.release_store_reused_ = release_store_reused_;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ old.dirty_[i] = dirty_[i];
+ // Then, clear current object.
+ ResetImpl();
+ // Allocate brand new clock in the current object.
+ Resize(c, old.size_);
+ // Now copy state back into this object.
+ Iter old_iter(&old);
+ for (ClockElem &ce : *this) {
+ ce = *old_iter;
+ ++old_iter;
+ }
+ release_store_tid_ = old.release_store_tid_;
+ release_store_reused_ = old.release_store_reused_;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dirty_[i] = old.dirty_[i];
+ // Drop reference to old and delete if necessary.
+ old.Reset(c);
+}
+
+// Can we cache this clock for future release operations?
+ALWAYS_INLINE bool SyncClock::Cachable() const {
+ if (size_ == 0)
+ return false;
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ if (dirty_[i].tid != kInvalidTid)
+ return false;
+ }
+ return atomic_load_relaxed(ref_ptr(tab_)) == 1;
+}
+
+// elem linearizes the two-level structure into linear array.
+// Note: this is used only for one time accesses, vector operations use
+// the iterator as it is much faster.
+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
DCHECK_LT(tid, size_);
- if (size_ <= ClockBlock::kClockCount)
+ const uptr block = tid / ClockBlock::kClockCount;
+ DCHECK_LE(block, blocks_);
+ tid %= ClockBlock::kClockCount;
+ if (block == blocks_)
return tab_->clock[tid];
- u32 idx = tab_->table[tid / ClockBlock::kClockCount];
+ u32 idx = get_block(block);
ClockBlock *cb = ctx->clock_alloc.Map(idx);
- return cb->clock[tid % ClockBlock::kClockCount];
+ return cb->clock[tid];
+}
+
+ALWAYS_INLINE uptr SyncClock::capacity() const {
+ if (size_ == 0)
+ return 0;
+ uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
+ // How many clock elements we can fit into the first level block.
+ // +1 for ref counter.
+ uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
+ return blocks_ * ClockBlock::kClockCount + top;
+}
+
+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
+ DCHECK(size_);
+ DCHECK_LT(bi, blocks_);
+ return tab_->table[ClockBlock::kBlockIdx - bi];
+}
+
+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
+ uptr bi = blocks_++;
+ CHECK_EQ(get_block(bi), 0);
+ tab_->table[ClockBlock::kBlockIdx - bi] = idx;
+}
+
+// Used only by tests.
+u64 SyncClock::get(unsigned tid) const {
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ Dirty dirty = dirty_[i];
+ if (dirty.tid == tid)
+ return dirty.epoch;
+ }
+ return elem(tid).epoch;
+}
+
+// Used only by Iter test.
+u64 SyncClock::get_clean(unsigned tid) const {
+ return elem(tid).epoch;
}
void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
@@ -419,8 +567,32 @@ void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
printf("] reused=[");
for (uptr i = 0; i < size_; i++)
printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
- printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+ printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
release_store_tid_, release_store_reused_,
- dirty_tids_[0], dirty_tids_[1]);
+ dirty_[0].tid, dirty_[0].epoch,
+ dirty_[1].tid, dirty_[1].epoch);
+}
+
+void SyncClock::Iter::Next() {
+ // Finished with the current block, move on to the next one.
+ block_++;
+ if (block_ < parent_->blocks_) {
+ // Iterate over the next second level block.
+ u32 idx = parent_->get_block(block_);
+ ClockBlock *cb = ctx->clock_alloc.Map(idx);
+ pos_ = &cb->clock[0];
+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+ ClockBlock::kClockCount);
+ return;
+ }
+ if (block_ == parent_->blocks_ &&
+ parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
+ // Iterate over elements in the first level block.
+ pos_ = &parent_->tab_->clock[0];
+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+ ClockBlock::kClockCount);
+ return;
+ }
+ parent_ = nullptr; // denotes end
}
} // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index 378b550fd11b..a891d7bbd889 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -18,25 +18,6 @@
namespace __tsan {
-struct ClockElem {
- u64 epoch : kClkBits;
- u64 reused : 64 - kClkBits;
-};
-
-struct ClockBlock {
- static const uptr kSize = 512;
- static const uptr kTableSize = kSize / sizeof(u32);
- static const uptr kClockCount = kSize / sizeof(ClockElem);
-
- union {
- u32 table[kTableSize];
- ClockElem clock[kClockCount];
- };
-
- ClockBlock() {
- }
-};
-
typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
typedef DenseSlabAllocCache ClockCache;
@@ -46,69 +27,117 @@ class SyncClock {
SyncClock();
~SyncClock();
- uptr size() const {
- return size_;
- }
+ uptr size() const;
- u64 get(unsigned tid) const {
- return elem(tid).epoch;
- }
+ // These are used only in tests.
+ u64 get(unsigned tid) const;
+ u64 get_clean(unsigned tid) const;
void Resize(ClockCache *c, uptr nclk);
void Reset(ClockCache *c);
void DebugDump(int(*printf)(const char *s, ...));
+ // Clock element iterator.
+ // Note: it iterates only over the table without regard to dirty entries.
+ class Iter {
+ public:
+ explicit Iter(SyncClock* parent);
+ Iter& operator++();
+ bool operator!=(const Iter& other);
+ ClockElem &operator*();
+
+ private:
+ SyncClock *parent_;
+ // [pos_, end_) is the current continuous range of clock elements.
+ ClockElem *pos_;
+ ClockElem *end_;
+ int block_; // Current number of second level block.
+
+ NOINLINE void Next();
+ };
+
+ Iter begin();
+ Iter end();
+
private:
- friend struct ThreadClock;
+ friend class ThreadClock;
+ friend class Iter;
static const uptr kDirtyTids = 2;
+ struct Dirty {
+ u64 epoch : kClkBits;
+ u64 tid : 64 - kClkBits; // kInvalidId if not active
+ };
+
unsigned release_store_tid_;
unsigned release_store_reused_;
- unsigned dirty_tids_[kDirtyTids];
- // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.
- // If size_ <= 64, then tab_ points to an array with 64 ClockElem's.
- // Otherwise, tab_ points to an array with 128 u32 elements,
+ Dirty dirty_[kDirtyTids];
+ // If size_ is 0, tab_ is nullptr.
+ // If size <= 64 (kClockCount), tab_ contains pointer to an array with
+ // 64 ClockElem's (ClockBlock::clock).
+ // Otherwise, tab_ points to an array with up to 127 u32 elements,
// each pointing to the second-level 512b block with 64 ClockElem's.
+ // Unused space in the first level ClockBlock is used to store additional
+ // clock elements.
+ // The last u32 element in the first level ClockBlock is always used as
+ // reference counter.
+ //
+ // See the following scheme for details.
+ // All memory blocks are 512 bytes (allocated from ClockAlloc).
+ // Clock (clk) elements are 64 bits.
+ // Idx and ref are 32 bits.
+ //
+ // tab_
+ // |
+ // \/
+ // +----------------------------------------------------+
+ // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
+ // +----------------------------------------------------+
+ // | |
+ // | \/
+ // | +----------------+
+ // | | clk0 ... clk63 |
+ // | +----------------+
+ // \/
+ // +------------------+
+ // | clk64 ... clk127 |
+ // +------------------+
+ //
+ // Note: dirty entries, if active, always override what's stored in the clock.
ClockBlock *tab_;
u32 tab_idx_;
- u32 size_;
+ u16 size_;
+ u16 blocks_; // Number of second level blocks.
+ void Unshare(ClockCache *c);
+ bool IsShared() const;
+ bool Cachable() const;
void ResetImpl();
+ void FlushDirty();
+ uptr capacity() const;
+ u32 get_block(uptr bi) const;
+ void append_block(u32 idx);
ClockElem &elem(unsigned tid) const;
};
// The clock that lives in threads.
-struct ThreadClock {
+class ThreadClock {
public:
typedef DenseSlabAllocCache Cache;
explicit ThreadClock(unsigned tid, unsigned reused = 0);
- u64 get(unsigned tid) const {
- DCHECK_LT(tid, kMaxTidInClock);
- return clk_[tid].epoch;
- }
-
+ u64 get(unsigned tid) const;
void set(ClockCache *c, unsigned tid, u64 v);
+ void set(u64 v);
+ void tick();
+ uptr size() const;
- void set(u64 v) {
- DCHECK_GE(v, clk_[tid_].epoch);
- clk_[tid_].epoch = v;
- }
-
- void tick() {
- clk_[tid_].epoch++;
- }
-
- uptr size() const {
- return nclk_;
- }
-
- void acquire(ClockCache *c, const SyncClock *src);
- void release(ClockCache *c, SyncClock *dst) const;
+ void acquire(ClockCache *c, SyncClock *src);
+ void release(ClockCache *c, SyncClock *dst);
void acq_rel(ClockCache *c, SyncClock *dst);
- void ReleaseStore(ClockCache *c, SyncClock *dst) const;
+ void ReleaseStore(ClockCache *c, SyncClock *dst);
void ResetCached(ClockCache *c);
void DebugReset();
@@ -116,16 +145,82 @@ struct ThreadClock {
private:
static const uptr kDirtyTids = SyncClock::kDirtyTids;
+ // Index of the thread associated with he clock ("current thread").
const unsigned tid_;
- const unsigned reused_;
+ const unsigned reused_; // tid_ reuse count.
+ // Current thread time when it acquired something from other threads.
u64 last_acquire_;
+
+ // Cached SyncClock (without dirty entries and release_store_tid_).
+ // We reuse it for subsequent store-release operations without intervening
+ // acquire operations. Since it is shared (and thus constant), clock value
+ // for the current thread is then stored in dirty entries in the SyncClock.
+ // We host a refernece to the table while it is cached here.
+ u32 cached_idx_;
+ u16 cached_size_;
+ u16 cached_blocks_;
+
+ // Number of active elements in the clk_ table (the rest is zeros).
uptr nclk_;
- ClockElem clk_[kMaxTidInClock];
+ u64 clk_[kMaxTidInClock]; // Fixed size vector clock.
bool IsAlreadyAcquired(const SyncClock *src) const;
- void UpdateCurrentThread(SyncClock *dst) const;
+ void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
};
+ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
+ DCHECK_LT(tid, kMaxTidInClock);
+ return clk_[tid];
+}
+
+ALWAYS_INLINE void ThreadClock::set(u64 v) {
+ DCHECK_GE(v, clk_[tid_]);
+ clk_[tid_] = v;
+}
+
+ALWAYS_INLINE void ThreadClock::tick() {
+ clk_[tid_]++;
+}
+
+ALWAYS_INLINE uptr ThreadClock::size() const {
+ return nclk_;
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
+ return Iter(this);
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
+ return Iter(nullptr);
+}
+
+ALWAYS_INLINE uptr SyncClock::size() const {
+ return size_;
+}
+
+ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
+ : parent_(parent)
+ , pos_(nullptr)
+ , end_(nullptr)
+ , block_(-1) {
+ if (parent)
+ Next();
+}
+
+ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
+ pos_++;
+ if (UNLIKELY(pos_ >= end_))
+ Next();
+ return *this;
+}
+
+ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
+ return parent_ != other.parent_;
+}
+
+ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
+ return *pos_;
+}
} // namespace __tsan
#endif // TSAN_CLOCK_H
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 8977fea7c552..3c775debfb09 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -38,15 +38,40 @@
namespace __tsan {
+const int kClkBits = 42;
+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
+
+struct ClockElem {
+ u64 epoch : kClkBits;
+ u64 reused : 64 - kClkBits; // tid reuse count
+};
+
+struct ClockBlock {
+ static const uptr kSize = 512;
+ static const uptr kTableSize = kSize / sizeof(u32);
+ static const uptr kClockCount = kSize / sizeof(ClockElem);
+ static const uptr kRefIdx = kTableSize - 1;
+ static const uptr kBlockIdx = kTableSize - 2;
+
+ union {
+ u32 table[kTableSize];
+ ClockElem clock[kClockCount];
+ };
+
+ ClockBlock() {
+ }
+};
+
const int kTidBits = 13;
-const unsigned kMaxTid = 1 << kTidBits;
+// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
+// occupied by reference counter, so total number of elements we can store
+// in SyncClock is kClockCount * (kTableSize - 1).
+const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
#if !SANITIZER_GO
const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.
#else
const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.
#endif
-const int kClkBits = 42;
-const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
const uptr kShadowStackSize = 64 * 1024;
// Count of shadow values in a shadow cell.
@@ -74,7 +99,7 @@ const bool kCollectHistory = false;
const bool kCollectHistory = true;
#endif
-const unsigned kInvalidTid = (unsigned)-1;
+const u16 kInvalidTid = kMaxTid + 1;
// The following "build consistency" machinery ensures that all source files
// are built in the same configuration. Inconsistent builds lead to
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index 1434cf688ce9..f79dccddba9f 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -10,6 +10,7 @@
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator_checks.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_placement_new.h"
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 0ba01babe69a..ead1e5704989 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -286,7 +286,7 @@ void InitializePlatform() {
int ExtractResolvFDs(void *state, int *fds, int nfd) {
#if SANITIZER_LINUX && !SANITIZER_ANDROID
int cnt = 0;
- __res_state *statp = (__res_state*)state;
+ struct __res_state *statp = (struct __res_state*)state;
for (int i = 0; i < MAXNS && cnt < nfd; i++) {
if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)
fds[cnt++] = statp->_u._ext.nssocks[i];
diff --git a/lib/tsan/tests/unit/tsan_clock_test.cc b/lib/tsan/tests/unit/tsan_clock_test.cc
index 73104dd6b9d4..f6230e1be565 100644
--- a/lib/tsan/tests/unit/tsan_clock_test.cc
+++ b/lib/tsan/tests/unit/tsan_clock_test.cc
@@ -53,6 +53,31 @@ TEST(Clock, ChunkedBasic) {
chunked.Reset(&cache);
}
+static const uptr interesting_sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66,
+ 100, 124, 125, 126, 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254,
+ 255};
+
+TEST(Clock, Iter) {
+ const uptr n = ARRAY_SIZE(interesting_sizes);
+ for (uptr fi = 0; fi < n; fi++) {
+ const uptr size = interesting_sizes[fi];
+ SyncClock sync;
+ ThreadClock vector(0);
+ for (uptr i = 0; i < size; i++)
+ vector.set(&cache, i, i + 1);
+ if (size != 0)
+ vector.release(&cache, &sync);
+ uptr i = 0;
+ for (ClockElem &ce : sync) {
+ ASSERT_LT(i, size);
+ ASSERT_EQ(sync.get_clean(i), ce.epoch);
+ i++;
+ }
+ ASSERT_EQ(i, size);
+ sync.Reset(&cache);
+ }
+}
+
TEST(Clock, AcquireRelease) {
ThreadClock vector1(100);
vector1.tick();
@@ -216,13 +241,11 @@ TEST(Clock, Growth) {
TEST(Clock, Growth2) {
// Test clock growth for every pair of sizes:
- const uptr sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66, 100, 124, 125, 126,
- 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254, 255};
- const uptr n = sizeof(sizes) / sizeof(sizes[0]);
+ const uptr n = ARRAY_SIZE(interesting_sizes);
for (uptr fi = 0; fi < n; fi++) {
for (uptr ti = fi + 1; ti < n; ti++) {
- const uptr from = sizes[fi];
- const uptr to = sizes[ti];
+ const uptr from = interesting_sizes[fi];
+ const uptr to = interesting_sizes[ti];
SyncClock sync;
ThreadClock vector(0);
for (uptr i = 0; i < from; i++)