aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorOlivier Houchard <cognet@FreeBSD.org>2018-04-02 23:35:32 +0000
committerOlivier Houchard <cognet@FreeBSD.org>2018-04-02 23:35:32 +0000
commite8d27288c2439ee79c81e4684ea90e8ca1aab845 (patch)
tree167d58dad3c007074e191f90478c1824dfe32aa5 /include
parent358370410ca01ec37e55ea44c0f3cd1fa8860c5d (diff)
downloadsrc-e8d27288c2439ee79c81e4684ea90e8ca1aab845.tar.gz
src-e8d27288c2439ee79c81e4684ea90e8ca1aab845.zip
Import CK as of commit b19ed4c6a56ec93215ab567ba18ba61bf1cfbac8vendor/ck/20180304
It should fix ck_pr_[load|store]_ptr on mips and riscv, make sure no *fence instructions are used on i386, as older cpus don't support it, and make sure we don't rely on gcc builtins that can lead to calls to libatomic when linked with -O0.
Notes
Notes: svn path=/vendor-sys/ck/dist/; revision=331895 svn path=/vendor-sys/ck/20180304/; revision=331896; tag=vendor/ck/20180304
Diffstat (limited to 'include')
-rw-r--r--include/ck_cc.h55
-rw-r--r--include/ck_hs.h4
-rw-r--r--include/ck_pr.h2
-rw-r--r--include/ck_queue.h2
-rw-r--r--include/ck_ring.h51
-rw-r--r--include/gcc/ck_cc.h19
-rw-r--r--include/gcc/ck_pr.h4
-rw-r--r--include/gcc/sparcv9/ck_pr.h2
-rw-r--r--include/gcc/x86/ck_pr.h64
-rw-r--r--include/gcc/x86_64/ck_pr.h35
-rw-r--r--include/spinlock/dec.h3
11 files changed, 154 insertions, 87 deletions
diff --git a/include/ck_cc.h b/include/ck_cc.h
index e17dc7b15f26..9a152a3cddab 100644
--- a/include/ck_cc.h
+++ b/include/ck_cc.h
@@ -104,41 +104,35 @@
#define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT)
#endif
-#ifndef CK_F_CC_FFS
-#define CK_F_CC_FFS
-CK_CC_INLINE static int
-ck_cc_ffs(unsigned int x)
-{
- unsigned int i;
-
- if (x == 0)
- return 0;
-
- for (i = 1; (x & 1) == 0; i++, x >>= 1);
-
- return i;
+#define CK_F_CC_FFS_G(L, T) \
+CK_CC_INLINE static int \
+ck_cc_##L(T v) \
+{ \
+ unsigned int i; \
+ \
+ if (v == 0) \
+ return 0; \
+ \
+ for (i = 1; (v & 1) == 0; i++, v >>= 1); \
+ return i; \
}
-#endif
-
-#ifndef CK_F_CC_CLZ
-#define CK_F_CC_CLZ
-#include <ck_limits.h>
-CK_CC_INLINE static int
-ck_cc_clz(unsigned int x)
-{
- unsigned int count, i;
+#ifndef CK_F_CC_FFS
+#define CK_F_CC_FFS
+CK_F_CC_FFS_G(ffs, unsigned int)
+#endif /* CK_F_CC_FFS */
- for (count = 0, i = sizeof(unsigned int) * CHAR_BIT; i > 0; count++) {
- unsigned int bit = 1U << --i;
+#ifndef CK_F_CC_FFSL
+#define CK_F_CC_FFSL
+CK_F_CC_FFS_G(ffsl, unsigned long)
+#endif /* CK_F_CC_FFSL */
- if (x & bit)
- break;
- }
+#ifndef CK_F_CC_FFSLL
+#define CK_F_CC_FFSLL
+CK_F_CC_FFS_G(ffsll, unsigned long long)
+#endif /* CK_F_CC_FFSLL */
- return count;
-}
-#endif
+#undef CK_F_CC_FFS_G
#ifndef CK_F_CC_CTZ
#define CK_F_CC_CTZ
@@ -151,7 +145,6 @@ ck_cc_ctz(unsigned int x)
return 0;
for (i = 0; (x & 1) == 0; i++, x >>= 1);
-
return i;
}
#endif
diff --git a/include/ck_hs.h b/include/ck_hs.h
index b3eb04698f73..3c12b6e602a7 100644
--- a/include/ck_hs.h
+++ b/include/ck_hs.h
@@ -100,10 +100,11 @@ struct ck_hs_stat {
struct ck_hs_iterator {
void **cursor;
unsigned long offset;
+ struct ck_hs_map *map;
};
typedef struct ck_hs_iterator ck_hs_iterator_t;
-#define CK_HS_ITERATOR_INITIALIZER { NULL, 0 }
+#define CK_HS_ITERATOR_INITIALIZER { NULL, 0, NULL }
/* Convenience wrapper to table hash function. */
#define CK_HS_HASH(T, F, K) F((K), (T)->seed)
@@ -112,6 +113,7 @@ typedef void *ck_hs_apply_fn_t(void *, void *);
bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *);
void ck_hs_iterator_init(ck_hs_iterator_t *);
bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **);
+bool ck_hs_next_spmc(ck_hs_t *, ck_hs_iterator_t *, void **);
bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *,
ck_hs_compare_cb_t *, struct ck_malloc *);
bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *,
diff --git a/include/ck_pr.h b/include/ck_pr.h
index 9b7fc42e99b7..4fdbdffa1110 100644
--- a/include/ck_pr.h
+++ b/include/ck_pr.h
@@ -43,6 +43,8 @@
#include "gcc/sparcv9/ck_pr.h"
#elif defined(__ppc64__)
#include "gcc/ppc64/ck_pr.h"
+#elif defined(__s390x__)
+#include "gcc/s390x/ck_pr.h"
#elif defined(__ppc__)
#include "gcc/ppc/ck_pr.h"
#elif defined(__arm__)
diff --git a/include/ck_queue.h b/include/ck_queue.h
index 0b7ce1aec708..28b298e6d9a3 100644
--- a/include/ck_queue.h
+++ b/include/ck_queue.h
@@ -235,7 +235,7 @@ struct { \
* Singly-linked Tail queue functions.
*/
#define CK_STAILQ_CONCAT(head1, head2) do { \
- if ((head2)->stqh_first == NULL) { \
+ if ((head2)->stqh_first != NULL) { \
ck_pr_store_ptr((head1)->stqh_last, (head2)->stqh_first); \
ck_pr_fence_store(); \
(head1)->stqh_last = (head2)->stqh_last; \
diff --git a/include/ck_ring.h b/include/ck_ring.h
index 8a2a7913e124..e5f0712ef7cf 100644
--- a/include/ck_ring.h
+++ b/include/ck_ring.h
@@ -176,23 +176,54 @@ _ck_ring_enqueue_mp(struct ck_ring *ring,
producer = ck_pr_load_uint(&ring->p_head);
- do {
+ for (;;) {
/*
- * The snapshot of producer must be up to date with
- * respect to consumer.
+ * The snapshot of producer must be up to date with respect to
+ * consumer.
*/
ck_pr_fence_load();
consumer = ck_pr_load_uint(&ring->c_head);
delta = producer + 1;
- if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) {
- r = false;
- goto leave;
+
+ /*
+ * Only try to CAS if the producer is not clearly stale (not
+ * less than consumer) and the buffer is definitely not full.
+ */
+ if (CK_CC_LIKELY((producer - consumer) < mask)) {
+ if (ck_pr_cas_uint_value(&ring->p_head,
+ producer, delta, &producer) == true) {
+ break;
+ }
+ } else {
+ unsigned int new_producer;
+
+ /*
+ * Slow path. Either the buffer is full or we have a
+ * stale snapshot of p_head. Execute a second read of
+ * p_read that must be ordered wrt the snapshot of
+ * c_head.
+ */
+ ck_pr_fence_load();
+ new_producer = ck_pr_load_uint(&ring->p_head);
+
+ /*
+ * Only fail if we haven't made forward progress in
+ * production: the buffer must have been full when we
+ * read new_producer (or we wrapped around UINT_MAX
+ * during this iteration).
+ */
+ if (producer == new_producer) {
+ r = false;
+ goto leave;
+ }
+
+ /*
+ * p_head advanced during this iteration. Try again.
+ */
+ producer = new_producer;
}
- } while (ck_pr_cas_uint_value(&ring->p_head,
- producer,
- delta,
- &producer) == false);
+ }
buffer = (char *)buffer + ts * (producer & mask);
memcpy(buffer, entry, ts);
diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h
index a14a4b51aa83..6ebc59cb5921 100644
--- a/include/gcc/ck_cc.h
+++ b/include/gcc/ck_cc.h
@@ -103,28 +103,26 @@
#define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X)
/*
- * Portability wrappers for bitwise ops.
+ * Portability wrappers for bitwise operations.
*/
-
+#ifndef CK_MD_CC_BUILTIN_DISABLE
#define CK_F_CC_FFS
-#define CK_F_CC_CLZ
-#define CK_F_CC_CTZ
-#define CK_F_CC_POPCOUNT
-
CK_CC_INLINE static int
ck_cc_ffs(unsigned int x)
{
- return __builtin_ffs(x);
+ return __builtin_ffsl(x);
}
+#define CK_F_CC_FFSL
CK_CC_INLINE static int
-ck_cc_clz(unsigned int x)
+ck_cc_ffsl(unsigned long x)
{
- return __builtin_clz(x);
+ return __builtin_ffsll(x);
}
+#define CK_F_CC_CTZ
CK_CC_INLINE static int
ck_cc_ctz(unsigned int x)
{
@@ -132,11 +130,12 @@ ck_cc_ctz(unsigned int x)
return __builtin_ctz(x);
}
+#define CK_F_CC_POPCOUNT
CK_CC_INLINE static int
ck_cc_popcount(unsigned int x)
{
return __builtin_popcount(x);
}
-
+#endif /* CK_MD_CC_BUILTIN_DISABLE */
#endif /* CK_GCC_CC_H */
diff --git a/include/gcc/ck_pr.h b/include/gcc/ck_pr.h
index 084d4232d820..108e983a1e5c 100644
--- a/include/gcc/ck_pr.h
+++ b/include/gcc/ck_pr.h
@@ -80,7 +80,7 @@ ck_pr_md_load_ptr(const void *target)
void *r;
ck_pr_barrier();
- r = CK_CC_DECONST_PTR(CK_PR_ACCESS(target));
+ r = CK_CC_DECONST_PTR(*(volatile void *const*)(target));
ck_pr_barrier();
return r;
@@ -91,7 +91,7 @@ ck_pr_md_store_ptr(void *target, const void *v)
{
ck_pr_barrier();
- CK_PR_ACCESS(target) = CK_CC_DECONST_PTR(v);
+ *(volatile void **)target = CK_CC_DECONST_PTR(v);
ck_pr_barrier();
return;
}
diff --git a/include/gcc/sparcv9/ck_pr.h b/include/gcc/sparcv9/ck_pr.h
index 767af6a0268b..7dc7172557b6 100644
--- a/include/gcc/sparcv9/ck_pr.h
+++ b/include/gcc/sparcv9/ck_pr.h
@@ -76,7 +76,7 @@ CK_PR_FENCE(store, "membar #StoreStore")
CK_PR_FENCE(store_load, "membar #StoreLoad")
CK_PR_FENCE(load, "membar #LoadLoad")
CK_PR_FENCE(load_store, "membar #LoadStore")
-CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
+CK_PR_FENCE(memory, "membar #MemIssue")
CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore")
CK_PR_FENCE(release, "membar #LoadStore | #StoreStore")
CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore")
diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h
index a04cebfd0337..3e36376fdd95 100644
--- a/include/gcc/x86/ck_pr.h
+++ b/include/gcc/x86/ck_pr.h
@@ -45,15 +45,9 @@
/* Minimum requirements for the CK_PR interface are met. */
#define CK_F_PR
-#ifdef CK_MD_UMP
-#define CK_PR_LOCK_PREFIX
-#else
-#define CK_PR_LOCK_PREFIX "lock "
-#endif
-
/*
- * Prevent speculative execution in busy-wait loops (P4 <=)
- * or "predefined delay".
+ * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
+ * delay".
*/
CK_CC_INLINE static void
ck_pr_stall(void)
@@ -62,28 +56,52 @@ ck_pr_stall(void)
return;
}
+#ifdef CK_MD_UMP
+#define CK_PR_LOCK_PREFIX
+#define CK_PR_FENCE(T, I) \
+ CK_CC_INLINE static void \
+ ck_pr_fence_strict_##T(void) \
+ { \
+ __asm__ __volatile__("" ::: "memory"); \
+ return; \
+ }
+#else
+#define CK_PR_LOCK_PREFIX "lock "
#define CK_PR_FENCE(T, I) \
CK_CC_INLINE static void \
ck_pr_fence_strict_##T(void) \
{ \
__asm__ __volatile__(I ::: "memory"); \
+ return; \
}
+#endif /* CK_MD_UMP */
-CK_PR_FENCE(atomic, "sfence")
-CK_PR_FENCE(atomic_store, "sfence")
-CK_PR_FENCE(atomic_load, "mfence")
-CK_PR_FENCE(store_atomic, "sfence")
-CK_PR_FENCE(load_atomic, "mfence")
-CK_PR_FENCE(load, "lfence")
-CK_PR_FENCE(load_store, "mfence")
-CK_PR_FENCE(store, "sfence")
-CK_PR_FENCE(store_load, "mfence")
-CK_PR_FENCE(memory, "mfence")
-CK_PR_FENCE(release, "mfence")
-CK_PR_FENCE(acquire, "mfence")
-CK_PR_FENCE(acqrel, "mfence")
-CK_PR_FENCE(lock, "mfence")
-CK_PR_FENCE(unlock, "mfence")
+#if defined(CK_MD_SSE_DISABLE)
+/* If SSE is disabled, then use atomic operations for serialization. */
+#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)"
+#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE
+#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE
+#else
+#define CK_MD_X86_SFENCE "sfence"
+#define CK_MD_X86_LFENCE "lfence"
+#define CK_MD_X86_MFENCE "mfence"
+#endif /* !CK_MD_SSE_DISABLE */
+
+CK_PR_FENCE(atomic, "")
+CK_PR_FENCE(atomic_store, "")
+CK_PR_FENCE(atomic_load, "")
+CK_PR_FENCE(store_atomic, "")
+CK_PR_FENCE(load_atomic, "")
+CK_PR_FENCE(load, CK_MD_X86_LFENCE)
+CK_PR_FENCE(load_store, CK_MD_X86_MFENCE)
+CK_PR_FENCE(store, CK_MD_X86_SFENCE)
+CK_PR_FENCE(store_load, CK_MD_X86_MFENCE)
+CK_PR_FENCE(memory, CK_MD_X86_MFENCE)
+CK_PR_FENCE(release, CK_MD_X86_MFENCE)
+CK_PR_FENCE(acquire, CK_MD_X86_MFENCE)
+CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE)
+CK_PR_FENCE(lock, CK_MD_X86_MFENCE)
+CK_PR_FENCE(unlock, CK_MD_X86_MFENCE)
#undef CK_PR_FENCE
diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h
index 532d593f277c..4de1332910ce 100644
--- a/include/gcc/x86_64/ck_pr.h
+++ b/include/gcc/x86_64/ck_pr.h
@@ -58,8 +58,8 @@
#endif
/*
- * Prevent speculative execution in busy-wait loops (P4 <=)
- * or "predefined delay".
+ * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
+ * delay".
*/
CK_CC_INLINE static void
ck_pr_stall(void)
@@ -75,18 +75,39 @@ ck_pr_stall(void)
__asm__ __volatile__(I ::: "memory"); \
}
-CK_PR_FENCE(atomic, "sfence")
-CK_PR_FENCE(atomic_store, "sfence")
-CK_PR_FENCE(atomic_load, "mfence")
-CK_PR_FENCE(store_atomic, "sfence")
-CK_PR_FENCE(load_atomic, "mfence")
+/* Atomic operations are always serializing. */
+CK_PR_FENCE(atomic, "")
+CK_PR_FENCE(atomic_store, "")
+CK_PR_FENCE(atomic_load, "")
+CK_PR_FENCE(store_atomic, "")
+CK_PR_FENCE(load_atomic, "")
+
+/* Traditional fence interface. */
CK_PR_FENCE(load, "lfence")
CK_PR_FENCE(load_store, "mfence")
CK_PR_FENCE(store, "sfence")
CK_PR_FENCE(store_load, "mfence")
CK_PR_FENCE(memory, "mfence")
+
+/* Below are stdatomic-style fences. */
+
+/*
+ * Provides load-store and store-store ordering. However, Intel specifies that
+ * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
+ * particular, stores are not re-ordered with respect to prior loads and it is
+ * really just the stores that are subject to re-ordering). However, we take
+ * the conservative route as the manuals are too ambiguous for my taste.
+ */
CK_PR_FENCE(release, "mfence")
+
+/*
+ * Provides load-load and load-store ordering. The lfence instruction ensures
+ * all prior load operations are complete before any subsequent instructions
+ * actually begin execution. However, the manual also ends up going to describe
+ * WC memory as a relaxed model.
+ */
CK_PR_FENCE(acquire, "mfence")
+
CK_PR_FENCE(acqrel, "mfence")
CK_PR_FENCE(lock, "mfence")
CK_PR_FENCE(unlock, "mfence")
diff --git a/include/spinlock/dec.h b/include/spinlock/dec.h
index 11d36dded027..3e36bf7612a4 100644
--- a/include/spinlock/dec.h
+++ b/include/spinlock/dec.h
@@ -111,7 +111,8 @@ ck_spinlock_dec_lock_eb(struct ck_spinlock_dec *lock)
if (r == true)
break;
- ck_backoff_eb(&backoff);
+ while (ck_pr_load_uint(&lock->value) != 1)
+ ck_backoff_eb(&backoff);
}
ck_pr_fence_lock();