src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-07-19 07:02:40 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-07-19 07:02:40 +0000
commit	462d72ec21756f5ca1002f128f014d36a845f3d2 (patch)
tree	b3d61e87f3a1b79e57d9639497994df822687bf0 /lib/tsan/rtl/tsan_clock.cc
parent	1992b790c2c12b7850bdf86662b67302052ec2fe (diff)
download	src-462d72ec21756f5ca1002f128f014d36a845f3d2.tar.gz src-462d72ec21756f5ca1002f128f014d36a845f3d2.zip

Vendor import of compiler-rt trunk r308421:vendor/compiler-rt/compiler-rt-trunk-r308421 vendor/compiler-rt/compiler-rt-release_50-r311219 vendor/compiler-rt/compiler-rt-release_50-r310316 vendor/compiler-rt/compiler-rt-release_50-r309439

https://llvm.org/svn/llvm-project/compiler-rt/trunk@308421

Notes

Notes: svn path=/vendor/compiler-rt/dist/; revision=321188 svn path=/vendor/compiler-rt/compiler-rt-release_50-r311219/; revision=322731; tag=vendor/compiler-rt/compiler-rt-release_50-r311219

Diffstat (limited to 'lib/tsan/rtl/tsan_clock.cc')

-rw-r--r--

lib/tsan/rtl/tsan_clock.cc

436

1 files changed, 304 insertions, 132 deletions

diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 9ee9104283f8..ef984a45cd9d 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc

@@ -61,20 +61,13 @@

// an exclusive lock; ThreadClock's are private to respective threads and so

// do not need any protection.

-// Description of ThreadClock state:

-// clk_ - fixed size vector clock.

-// nclk_ - effective size of the vector clock (the rest is zeros).

-// tid_ - index of the thread associated with he clock ("current thread").

-// last_acquire_ - current thread time when it acquired something from

-// other threads.

-//

// Description of SyncClock state:

// clk_ - variable size vector clock, low kClkBits hold timestamp,

// the remaining bits hold "acquired" flag (the actual value is thread's

// reused counter);

// if acquried == thr->reused_, then the respective thread has already

-// acquired this clock (except possibly dirty_tids_).

-// dirty_tids_ - holds up to two indeces in the vector clock that other threads

+// acquired this clock (except possibly for dirty elements).

+// dirty_ - holds up to two indeces in the vector clock that other threads

// need to acquire regardless of "acquired" flag value;

// release_store_tid_ - denotes that the clock state is a result of

// release-store operation by the thread with release_store_tid_ index.

@@ -90,21 +83,51 @@

namespace __tsan {

+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {

+ return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);

+// Drop reference to the first level block idx.

+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {

+ ClockBlock *cb = ctx->clock_alloc.Map(idx);

+ atomic_uint32_t *ref = ref_ptr(cb);

+ u32 v = atomic_load(ref, memory_order_acquire);

+ for (;;) {

+ CHECK_GT(v, 0);

+ if (v == 1)

+ break;

+ if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))

+ return;

+ }

+ // First level block owns second level blocks, so them as well.

+ for (uptr i = 0; i < blocks; i++)

+ ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);

+ ctx->clock_alloc.Free(c, idx);

ThreadClock::ThreadClock(unsigned tid, unsigned reused)

: tid_(tid)

- , reused_(reused + 1) { // 0 has special meaning

+ , reused_(reused + 1) // 0 has special meaning

+ , cached_idx_()

+ , cached_size_()

+ , cached_blocks_() {

CHECK_LT(tid, kMaxTidInClock);

CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);

nclk_ = tid_ + 1;

last_acquire_ = 0;

internal_memset(clk_, 0, sizeof(clk_));

- clk_[tid_].reused = reused_;

}

void ThreadClock::ResetCached(ClockCache *c) {

+ if (cached_idx_) {

+ UnrefClockBlock(c, cached_idx_, cached_blocks_);

+ cached_idx_ = 0;

+ cached_size_ = 0;

+ cached_blocks_ = 0;

+ }

}

-void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {

+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(src->size_, kMaxTid);

CPP_STAT_INC(StatClockAcquire);

@@ -116,50 +139,46 @@ void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {

return;

}

- // Check if we've already acquired src after the last release operation on src

bool acquired = false;

- if (nclk > tid_) {

- if (src->elem(tid_).reused == reused_) {

- for (unsigned i = 0; i < kDirtyTids; i++) {

- unsigned tid = src->dirty_tids_[i];

- if (tid != kInvalidTid) {

- u64 epoch = src->elem(tid).epoch;

- if (clk_[tid].epoch < epoch) {

- clk_[tid].epoch = epoch;

- acquired = true;

- }

- if (acquired) {

- CPP_STAT_INC(StatClockAcquiredSomething);

- last_acquire_ = clk_[tid_].epoch;

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ SyncClock::Dirty dirty = src->dirty_[i];

+ unsigned tid = dirty.tid;

+ if (tid != kInvalidTid) {

+ if (clk_[tid] < dirty.epoch) {

+ clk_[tid] = dirty.epoch;

+ acquired = true;

}

- return;

}

- // O(N) acquire.

- CPP_STAT_INC(StatClockAcquireFull);

- nclk_ = max(nclk_, nclk);

- for (uptr i = 0; i < nclk; i++) {

- u64 epoch = src->elem(i).epoch;

- if (clk_[i].epoch < epoch) {

- clk_[i].epoch = epoch;

- acquired = true;

+ // Check if we've already acquired src after the last release operation on src

+ if (tid_ >= nclk || src->elem(tid_).reused != reused_) {

+ // O(N) acquire.

+ CPP_STAT_INC(StatClockAcquireFull);

+ nclk_ = max(nclk_, nclk);

+ u64 *dst_pos = &clk_[0];

+ for (ClockElem &src_elem : *src) {

+ u64 epoch = src_elem.epoch;

+ if (*dst_pos < epoch) {

+ *dst_pos = epoch;

+ acquired = true;

+ }

+ dst_pos++;

}

- }

- // Remember that this thread has acquired this clock.

- if (nclk > tid_)

- src->elem(tid_).reused = reused_;

+ // Remember that this thread has acquired this clock.

+ if (nclk > tid_)

+ src->elem(tid_).reused = reused_;

+ }

if (acquired) {

CPP_STAT_INC(StatClockAcquiredSomething);

- last_acquire_ = clk_[tid_].epoch;

+ last_acquire_ = clk_[tid_];

+ ResetCached(c);

}

-void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

+void ThreadClock::release(ClockCache *c, SyncClock *dst) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(dst->size_, kMaxTid);

@@ -179,7 +198,7 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

// since the last release on dst. If so, we need to update

// only dst->elem(tid_).

if (dst->elem(tid_).epoch > last_acquire_) {

- UpdateCurrentThread(dst);

+ UpdateCurrentThread(c, dst);

if (dst->release_store_tid_ != tid_ ||

dst->release_store_reused_ != reused_)

dst->release_store_tid_ = kInvalidTid;

@@ -188,23 +207,24 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

// O(N) release.

CPP_STAT_INC(StatClockReleaseFull);

+ dst->Unshare(c);

// First, remember whether we've acquired dst.

bool acquired = IsAlreadyAcquired(dst);

if (acquired)

CPP_STAT_INC(StatClockReleaseAcquired);

// Update dst->clk_.

- for (uptr i = 0; i < nclk_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = max(ce.epoch, clk_[i].epoch);

+ dst->FlushDirty();

+ uptr i = 0;

+ for (ClockElem &ce : *dst) {

+ ce.epoch = max(ce.epoch, clk_[i]);

ce.reused = 0;

+ i++;

}

// Clear 'acquired' flag in the remaining elements.

if (nclk_ < dst->size_)

CPP_STAT_INC(StatClockReleaseClearTail);

for (uptr i = nclk_; i < dst->size_; i++)

dst->elem(i).reused = 0;

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

dst->release_store_tid_ = kInvalidTid;

dst->release_store_reused_ = 0;

// If we've acquired dst, remember this fact,

@@ -213,11 +233,37 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

dst->elem(tid_).reused = reused_;

}

-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {

+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {

DCHECK_LE(nclk_, kMaxTid);

DCHECK_LE(dst->size_, kMaxTid);

CPP_STAT_INC(StatClockStore);

+ if (dst->size_ == 0 && cached_idx_ != 0) {

+ // Reuse the cached clock.

+ // Note: we could reuse/cache the cached clock in more cases:

+ // we could update the existing clock and cache it, or replace it with the

+ // currently cached clock and release the old one. And for a shared

+ // existing clock, we could replace it with the currently cached;

+ // or unshare, update and cache. But, for simplicity, we currnetly reuse

+ // cached clock only when the target clock is empty.

+ dst->tab_ = ctx->clock_alloc.Map(cached_idx_);

+ dst->tab_idx_ = cached_idx_;

+ dst->size_ = cached_size_;

+ dst->blocks_ = cached_blocks_;

+ CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);

+ // The cached clock is shared (immutable),

+ // so this is where we store the current clock.

+ dst->dirty_[0].tid = tid_;

+ dst->dirty_[0].epoch = clk_[tid_];

+ dst->release_store_tid_ = tid_;

+ dst->release_store_reused_ = reused_;

+ // Rememeber that we don't need to acquire it in future.

+ dst->elem(tid_).reused = reused_;

+ // Grab a reference.

+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);

+ return;

+ }

// Check if we need to resize dst.

if (dst->size_ < nclk_)

dst->Resize(c, nclk_);

@@ -226,32 +272,41 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {

dst->release_store_reused_ == reused_ &&

dst->elem(tid_).epoch > last_acquire_) {

CPP_STAT_INC(StatClockStoreFast);

- UpdateCurrentThread(dst);

+ UpdateCurrentThread(c, dst);

return;

}

// O(N) release-store.

CPP_STAT_INC(StatClockStoreFull);

- for (uptr i = 0; i < nclk_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = clk_[i].epoch;

+ dst->Unshare(c);

+ // Note: dst can be larger than this ThreadClock.

+ // This is fine since clk_ beyond size is all zeros.

+ uptr i = 0;

+ for (ClockElem &ce : *dst) {

+ ce.epoch = clk_[i];

ce.reused = 0;

+ i++;

}

- // Clear the tail of dst->clk_.

- if (nclk_ < dst->size_) {

- for (uptr i = nclk_; i < dst->size_; i++) {

- ClockElem &ce = dst->elem(i);

- ce.epoch = 0;

- ce.reused = 0;

- }

- CPP_STAT_INC(StatClockStoreTail);

- }

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

+ for (uptr i = 0; i < kDirtyTids; i++)

+ dst->dirty_[i].tid = kInvalidTid;

dst->release_store_tid_ = tid_;

dst->release_store_reused_ = reused_;

// Rememeber that we don't need to acquire it in future.

dst->elem(tid_).reused = reused_;

+ // If the resulting clock is cachable, cache it for future release operations.

+ // The clock is always cachable if we released to an empty sync object.

+ if (cached_idx_ == 0 && dst->Cachable()) {

+ // Grab a reference to the ClockBlock.

+ atomic_uint32_t *ref = ref_ptr(dst->tab_);

+ if (atomic_load(ref, memory_order_acquire) == 1)

+ atomic_store_relaxed(ref, 2);

+ else

+ atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);

+ cached_idx_ = dst->tab_idx_;

+ cached_size_ = dst->size_;

+ cached_blocks_ = dst->blocks_;

+ }

}

void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {

@@ -261,37 +316,36 @@ void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {

}

// Updates only single element related to the current thread in dst->clk_.

-void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {

+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {

// Update the threads time, but preserve 'acquired' flag.

- dst->elem(tid_).epoch = clk_[tid_].epoch;

for (unsigned i = 0; i < kDirtyTids; i++) {

- if (dst->dirty_tids_[i] == tid_) {

+ SyncClock::Dirty *dirty = &dst->dirty_[i];

+ const unsigned tid = dirty->tid;

+ if (tid == tid_ || tid == kInvalidTid) {

CPP_STAT_INC(StatClockReleaseFast);

- return;

- }

- if (dst->dirty_tids_[i] == kInvalidTid) {

- CPP_STAT_INC(StatClockReleaseFast);

- dst->dirty_tids_[i] = tid_;

+ dirty->tid = tid_;

+ dirty->epoch = clk_[tid_];

return;

}

// Reset all 'acquired' flags, O(N).

+ // We are going to touch dst elements, so we need to unshare it.

+ dst->Unshare(c);

CPP_STAT_INC(StatClockReleaseSlow);

+ dst->elem(tid_).epoch = clk_[tid_];

for (uptr i = 0; i < dst->size_; i++)

dst->elem(i).reused = 0;

- for (unsigned i = 0; i < kDirtyTids; i++)

- dst->dirty_tids_[i] = kInvalidTid;

+ dst->FlushDirty();

}

-// Checks whether the current threads has already acquired src.

+// Checks whether the current thread has already acquired src.

bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {

if (src->elem(tid_).reused != reused_)

return false;

for (unsigned i = 0; i < kDirtyTids; i++) {

- unsigned tid = src->dirty_tids_[i];

- if (tid != kInvalidTid) {

- if (clk_[tid].epoch < src->elem(tid).epoch)

+ SyncClock::Dirty dirty = src->dirty_[i];

+ if (dirty.tid != kInvalidTid) {

+ if (clk_[dirty.tid] < dirty.epoch)

return false;

}

@@ -302,22 +356,19 @@ bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {

// This function is called only from weird places like AcquireGlobal.

void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {

DCHECK_LT(tid, kMaxTid);

- DCHECK_GE(v, clk_[tid].epoch);

- clk_[tid].epoch = v;

+ DCHECK_GE(v, clk_[tid]);

+ clk_[tid] = v;

if (nclk_ <= tid)

nclk_ = tid + 1;

- last_acquire_ = clk_[tid_].epoch;

+ last_acquire_ = clk_[tid_];

+ ResetCached(c);

}

void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {

printf("clock=[");

for (uptr i = 0; i < nclk_; i++)

- printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);

- printf("] reused=[");

- for (uptr i = 0; i < nclk_; i++)

- printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);

- printf("] tid=%u/%u last_acq=%llu",

- tid_, reused_, last_acquire_);

+ printf("%s%llu", i == 0 ? "" : ",", clk_[i]);

+ printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);

}

SyncClock::SyncClock() {

@@ -327,22 +378,14 @@ SyncClock::SyncClock() {

SyncClock::~SyncClock() {

// Reset must be called before dtor.

CHECK_EQ(size_, 0);

+ CHECK_EQ(blocks_, 0);

CHECK_EQ(tab_, 0);

CHECK_EQ(tab_idx_, 0);

}

void SyncClock::Reset(ClockCache *c) {

- if (size_ == 0) {

- // nothing

- } else if (size_ <= ClockBlock::kClockCount) {

- // One-level table.

- ctx->clock_alloc.Free(c, tab_idx_);

- } else {

- // Two-level table.

- for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)

- ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);

- ctx->clock_alloc.Free(c, tab_idx_);

- }

+ if (size_)

+ UnrefClockBlock(c, tab_idx_, blocks_);

ResetImpl();

}

@@ -350,66 +393,171 @@ void SyncClock::ResetImpl() {

tab_ = 0;

tab_idx_ = 0;

size_ = 0;

+ blocks_ = 0;

release_store_tid_ = kInvalidTid;

release_store_reused_ = 0;

for (uptr i = 0; i < kDirtyTids; i++)

- dirty_tids_[i] = kInvalidTid;

+ dirty_[i].tid = kInvalidTid;

}

void SyncClock::Resize(ClockCache *c, uptr nclk) {

CPP_STAT_INC(StatClockReleaseResize);

- if (RoundUpTo(nclk, ClockBlock::kClockCount) <=

- RoundUpTo(size_, ClockBlock::kClockCount)) {

- // Growing within the same block.

+ Unshare(c);

+ if (nclk <= capacity()) {

// Memory is already allocated, just increase the size.

size_ = nclk;

return;

}

- if (nclk <= ClockBlock::kClockCount) {

+ if (size_ == 0) {

// Grow from 0 to one-level table.

CHECK_EQ(size_, 0);

+ CHECK_EQ(blocks_, 0);

CHECK_EQ(tab_, 0);

CHECK_EQ(tab_idx_, 0);

- size_ = nclk;

- tab_idx_ = ctx->clock_alloc.Alloc(c);

- tab_ = ctx->clock_alloc.Map(tab_idx_);

- internal_memset(tab_, 0, sizeof(*tab_));

- return;

- }

- // Growing two-level table.

- if (size_ == 0) {

- // Allocate first level table.

- tab_idx_ = ctx->clock_alloc.Alloc(c);

- tab_ = ctx->clock_alloc.Map(tab_idx_);

- internal_memset(tab_, 0, sizeof(*tab_));

- } else if (size_ <= ClockBlock::kClockCount) {

- // Transform one-level table to two-level table.

- u32 old = tab_idx_;

tab_idx_ = ctx->clock_alloc.Alloc(c);

tab_ = ctx->clock_alloc.Map(tab_idx_);

internal_memset(tab_, 0, sizeof(*tab_));

- tab_->table[0] = old;

+ atomic_store_relaxed(ref_ptr(tab_), 1);

+ size_ = 1;

+ } else if (size_ > blocks_ * ClockBlock::kClockCount) {

+ u32 idx = ctx->clock_alloc.Alloc(c);

+ ClockBlock *new_cb = ctx->clock_alloc.Map(idx);

+ uptr top = size_ - blocks_ * ClockBlock::kClockCount;

+ CHECK_LT(top, ClockBlock::kClockCount);

+ const uptr move = top * sizeof(tab_->clock[0]);

+ internal_memcpy(&new_cb->clock[0], tab_->clock, move);

+ internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);

+ internal_memset(tab_->clock, 0, move);

+ append_block(idx);

}

- // At this point we have first level table allocated.

+ // At this point we have first level table allocated and all clock elements

+ // are evacuated from it to a second level block.

// Add second level tables as necessary.

- for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);

- i < nclk; i += ClockBlock::kClockCount) {

+ while (nclk > capacity()) {

u32 idx = ctx->clock_alloc.Alloc(c);

ClockBlock *cb = ctx->clock_alloc.Map(idx);

internal_memset(cb, 0, sizeof(*cb));

- CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);

- tab_->table[i/ClockBlock::kClockCount] = idx;

+ append_block(idx);

}

size_ = nclk;

}

-ClockElem &SyncClock::elem(unsigned tid) const {

+// Flushes all dirty elements into the main clock array.

+void SyncClock::FlushDirty() {

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ Dirty *dirty = &dirty_[i];

+ if (dirty->tid != kInvalidTid) {

+ CHECK_LT(dirty->tid, size_);

+ elem(dirty->tid).epoch = dirty->epoch;

+ dirty->tid = kInvalidTid;

+ }

+bool SyncClock::IsShared() const {

+ if (size_ == 0)

+ return false;

+ atomic_uint32_t *ref = ref_ptr(tab_);

+ u32 v = atomic_load(ref, memory_order_acquire);

+ CHECK_GT(v, 0);

+ return v > 1;

+// Unshares the current clock if it's shared.

+// Shared clocks are immutable, so they need to be unshared before any updates.

+// Note: this does not apply to dirty entries as they are not shared.

+void SyncClock::Unshare(ClockCache *c) {

+ if (!IsShared())

+ return;

+ // First, copy current state into old.

+ SyncClock old;

+ old.tab_ = tab_;

+ old.tab_idx_ = tab_idx_;

+ old.size_ = size_;

+ old.blocks_ = blocks_;

+ old.release_store_tid_ = release_store_tid_;

+ old.release_store_reused_ = release_store_reused_;

+ for (unsigned i = 0; i < kDirtyTids; i++)

+ old.dirty_[i] = dirty_[i];

+ // Then, clear current object.

+ ResetImpl();

+ // Allocate brand new clock in the current object.

+ Resize(c, old.size_);

+ // Now copy state back into this object.

+ Iter old_iter(&old);

+ for (ClockElem &ce : *this) {

+ ce = *old_iter;

+ ++old_iter;

+ }

+ release_store_tid_ = old.release_store_tid_;

+ release_store_reused_ = old.release_store_reused_;

+ for (unsigned i = 0; i < kDirtyTids; i++)

+ dirty_[i] = old.dirty_[i];

+ // Drop reference to old and delete if necessary.

+ old.Reset(c);

+// Can we cache this clock for future release operations?

+ALWAYS_INLINE bool SyncClock::Cachable() const {

+ if (size_ == 0)

+ return false;

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ if (dirty_[i].tid != kInvalidTid)

+ return false;

+ }

+ return atomic_load_relaxed(ref_ptr(tab_)) == 1;

+// elem linearizes the two-level structure into linear array.

+// Note: this is used only for one time accesses, vector operations use

+// the iterator as it is much faster.

+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {

DCHECK_LT(tid, size_);

- if (size_ <= ClockBlock::kClockCount)

+ const uptr block = tid / ClockBlock::kClockCount;

+ DCHECK_LE(block, blocks_);

+ tid %= ClockBlock::kClockCount;

+ if (block == blocks_)

return tab_->clock[tid];

- u32 idx = tab_->table[tid / ClockBlock::kClockCount];

+ u32 idx = get_block(block);

ClockBlock *cb = ctx->clock_alloc.Map(idx);

- return cb->clock[tid % ClockBlock::kClockCount];

+ return cb->clock[tid];

+ALWAYS_INLINE uptr SyncClock::capacity() const {

+ if (size_ == 0)

+ return 0;

+ uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);

+ // How many clock elements we can fit into the first level block.

+ // +1 for ref counter.

+ uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;

+ return blocks_ * ClockBlock::kClockCount + top;

+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {

+ DCHECK(size_);

+ DCHECK_LT(bi, blocks_);

+ return tab_->table[ClockBlock::kBlockIdx - bi];

+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {

+ uptr bi = blocks_++;

+ CHECK_EQ(get_block(bi), 0);

+ tab_->table[ClockBlock::kBlockIdx - bi] = idx;

+// Used only by tests.

+u64 SyncClock::get(unsigned tid) const {

+ for (unsigned i = 0; i < kDirtyTids; i++) {

+ Dirty dirty = dirty_[i];

+ if (dirty.tid == tid)

+ return dirty.epoch;

+ }

+ return elem(tid).epoch;

+// Used only by Iter test.

+u64 SyncClock::get_clean(unsigned tid) const {

+ return elem(tid).epoch;

}

void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {

@@ -419,8 +567,32 @@ void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {

printf("] reused=[");

for (uptr i = 0; i < size_; i++)

printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);

- printf("] release_store_tid=%d/%d dirty_tids=%d/%d",

+ printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",

release_store_tid_, release_store_reused_,

- dirty_tids_[0], dirty_tids_[1]);

+ dirty_[0].tid, dirty_[0].epoch,

+ dirty_[1].tid, dirty_[1].epoch);

+void SyncClock::Iter::Next() {

+ // Finished with the current block, move on to the next one.

+ block_++;

+ if (block_ < parent_->blocks_) {

+ // Iterate over the next second level block.

+ u32 idx = parent_->get_block(block_);

+ ClockBlock *cb = ctx->clock_alloc.Map(idx);

+ pos_ = &cb->clock[0];

+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,

+ ClockBlock::kClockCount);

+ return;

+ }

+ if (block_ == parent_->blocks_ &&

+ parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {

+ // Iterate over elements in the first level block.

+ pos_ = &parent_->tab_->clock[0];

+ end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,

+ ClockBlock::kClockCount);

+ return;

+ }

+ parent_ = nullptr; // denotes end

}

} // namespace __tsan