aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAttilio Rao <attilio@FreeBSD.org>2009-12-12 21:31:07 +0000
committerAttilio Rao <attilio@FreeBSD.org>2009-12-12 21:31:07 +0000
commit2028867def7031b5b9b732cd2e08e102ba1e2831 (patch)
tree3cab4285c08dfc9b6999901a2864c664eeda6602
parent1173b9a2d0fb82f27b7ba8581045e45de6231747 (diff)
downloadsrc-2028867def7031b5b9b732cd2e08e102ba1e2831.tar.gz
src-2028867def7031b5b9b732cd2e08e102ba1e2831.zip
In current code, threads performing an interruptible sleep (on both
sxlock, via the sx_{s, x}lock_sig() interface, or plain lockmgr), will leave the waiters flag on forcing the owner to do a wakeup even when if the waiter queue is empty. That operation may lead to a deadlock in the case of doing a fake wakeup on the "preferred" (based on the wakeup algorithm) queue while the other queue has real waiters on it, because nobody is going to wakeup the 2nd queue waiters and they will sleep indefinitively. A similar bug, is present, for lockmgr in the case the waiters are sleeping with LK_SLEEPFAIL on. In this case, even if the waiters queue is not empty, the waiters won't progress after being awake but they will just fail, still not taking care of the 2nd queue waiters (as instead the lock owned doing the wakeup would expect). In order to fix this bug in a cheap way (without adding too much locking and complicating too much the semantic) add a sleepqueue interface which does report the actual number of waiters on a specified queue of a waitchannel (sleepq_sleepcnt()) and use it in order to determine if the exclusive waiters (or shared waiters) are actually present on the lockmgr (or sx) before to give them precedence in the wakeup algorithm. This fix alone, however doesn't solve the LK_SLEEPFAIL bug. In order to cope with it, add the tracking of how many exclusive LK_SLEEPFAIL waiters a lockmgr has and if all the waiters on the exclusive waiters queue are LK_SLEEPFAIL just wake both queues. The sleepq_sleepcnt() introduction and ABI breakage require __FreeBSD_version bumping. Reported by: avg, kib, pho Reviewed by: kib Tested by: pho
Notes
Notes: svn path=/head/; revision=200447
-rw-r--r--share/man/man9/sleepqueue.913
-rw-r--r--sys/kern/kern_lock.c105
-rw-r--r--sys/kern/kern_sx.c6
-rw-r--r--sys/kern/subr_sleepqueue.c35
-rw-r--r--sys/sys/_lockmgr.h1
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/sleepqueue.h1
7 files changed, 143 insertions, 20 deletions
diff --git a/share/man/man9/sleepqueue.9 b/share/man/man9/sleepqueue.9
index e2e3a4b41501..bc5acf860720 100644
--- a/share/man/man9/sleepqueue.9
+++ b/share/man/man9/sleepqueue.9
@@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 13, 2007
+.Dd December 11, 2009
.Dt SLEEPQUEUE 9
.Os
.Sh NAME
@@ -41,6 +41,7 @@
.Nm sleepq_remove ,
.Nm sleepq_signal ,
.Nm sleepq_set_timeout ,
+.Nm sleepq_sleepcnt ,
.Nm sleepq_timedwait ,
.Nm sleepq_timedwait_sig ,
.Nm sleepq_wait ,
@@ -77,6 +78,8 @@
.Fn sleepq_signal "void *wchan" "int flags" "int pri" "int queue"
.Ft void
.Fn sleepq_set_timeout "void *wchan" "int timo"
+.Ft u_int
+.Fn sleepq_sleepcnt "void *wchan" "int queue"
.Ft int
.Fn sleepq_timedwait "void *wchan"
.Ft int
@@ -348,6 +351,14 @@ One possible use is waking up a specific thread from a widely shared sleep
channel.
.Pp
The
+.Fn sleepq_sleepcnt
+function offer a simple way to retrieve the number of threads sleeping for
+the specified
+.Fa queue ,
+given a
+.Fa wchan .
+.Pp
+The
.Fn sleepq_abort ,
.Fn sleepq_broadcast ,
and
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index 90df2fcb6348..531c851e7f42 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -197,6 +197,8 @@ sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
if (flags & LK_INTERLOCK)
class->lc_unlock(ilk);
+ if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
+ lk->lk_exslpfail++;
GIANT_SAVE();
sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
SLEEPQ_INTERRUPTIBLE : 0), queue);
@@ -225,6 +227,7 @@ static __inline int
wakeupshlk(struct lock *lk, const char *file, int line)
{
uintptr_t v, x;
+ u_int realexslp;
int queue, wakeup_swapper;
TD_LOCKS_DEC(curthread);
@@ -270,13 +273,34 @@ wakeupshlk(struct lock *lk, const char *file, int line)
/*
* If the lock has exclusive waiters, give them preference in
* order to avoid deadlock with shared runners up.
+ * If interruptible sleeps left the exclusive queue empty
+ * avoid a starvation for the threads sleeping on the shared
+ * queue by giving them precedence and cleaning up the
+ * exclusive waiters bit anyway.
*/
- if (x & LK_EXCLUSIVE_WAITERS) {
- queue = SQ_EXCLUSIVE_QUEUE;
- v |= (x & LK_SHARED_WAITERS);
+ realexslp = sleepq_sleepcnt(&lk->lock_object,
+ SQ_EXCLUSIVE_QUEUE);
+ if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
+ if (lk->lk_exslpfail < realexslp) {
+ lk->lk_exslpfail = 0;
+ queue = SQ_EXCLUSIVE_QUEUE;
+ v |= (x & LK_SHARED_WAITERS);
+ } else {
+ lk->lk_exslpfail = 0;
+ LOCK_LOG2(lk,
+ "%s: %p has only LK_SLEEPFAIL sleepers",
+ __func__, lk);
+ LOCK_LOG2(lk,
+ "%s: %p waking up threads on the exclusive queue",
+ __func__, lk);
+ wakeup_swapper =
+ sleepq_broadcast(&lk->lock_object,
+ SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+ queue = SQ_SHARED_QUEUE;
+ }
+
} else {
- MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
- LK_SHARED_WAITERS);
+ MPASS(lk->lk_exslpfail == 0);
queue = SQ_SHARED_QUEUE;
}
@@ -288,7 +312,7 @@ wakeupshlk(struct lock *lk, const char *file, int line)
LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
__func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
"exclusive");
- wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
+ wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
0, queue);
sleepq_release(&lk->lock_object);
break;
@@ -353,6 +377,7 @@ lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
lk->lk_lock = LK_UNLOCKED;
lk->lk_recurse = 0;
+ lk->lk_exslpfail = 0;
lk->lk_timo = timo;
lk->lk_pri = pri;
lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
@@ -365,6 +390,7 @@ lockdestroy(struct lock *lk)
KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
+ KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
lock_destroy(&lk->lock_object);
}
@@ -376,7 +402,7 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
struct lock_class *class;
const char *iwmesg;
uintptr_t tid, v, x;
- u_int op;
+ u_int op, realexslp;
int error, ipri, itimo, queue, wakeup_swapper;
#ifdef LOCK_PROFILING
uint64_t waittime = 0;
@@ -906,14 +932,34 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
* If the lock has exclusive waiters, give them
* preference in order to avoid deadlock with
* shared runners up.
+ * If interruptible sleeps left the exclusive queue
+ * empty avoid a starvation for the threads sleeping
+ * on the shared queue by giving them precedence
+ * and cleaning up the exclusive waiters bit anyway.
*/
MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
- if (x & LK_EXCLUSIVE_WAITERS) {
- queue = SQ_EXCLUSIVE_QUEUE;
- v |= (x & LK_SHARED_WAITERS);
+ realexslp = sleepq_sleepcnt(&lk->lock_object,
+ SQ_EXCLUSIVE_QUEUE);
+ if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
+ if (lk->lk_exslpfail < realexslp) {
+ lk->lk_exslpfail = 0;
+ queue = SQ_EXCLUSIVE_QUEUE;
+ v |= (x & LK_SHARED_WAITERS);
+ } else {
+ lk->lk_exslpfail = 0;
+ LOCK_LOG2(lk,
+ "%s: %p has only LK_SLEEPFAIL sleepers",
+ __func__, lk);
+ LOCK_LOG2(lk,
+ "%s: %p waking up threads on the exclusive queue",
+ __func__, lk);
+ wakeup_swapper =
+ sleepq_broadcast(&lk->lock_object,
+ SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+ queue = SQ_SHARED_QUEUE;
+ }
} else {
- MPASS((x & LK_ALL_WAITERS) ==
- LK_SHARED_WAITERS);
+ MPASS(lk->lk_exslpfail == 0);
queue = SQ_SHARED_QUEUE;
}
@@ -922,7 +968,7 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
__func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
"exclusive");
atomic_store_rel_ptr(&lk->lk_lock, v);
- wakeup_swapper = sleepq_broadcast(&lk->lock_object,
+ wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
SLEEPQ_LK, 0, queue);
sleepq_release(&lk->lock_object);
break;
@@ -979,14 +1025,47 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
if ((x & ~v) == LK_UNLOCKED) {
v = (x & ~LK_EXCLUSIVE_SPINNERS);
+
+ /*
+ * If interruptible sleeps left the exclusive
+ * queue empty avoid a starvation for the
+ * threads sleeping on the shared queue by
+ * giving them precedence and cleaning up the
+ * exclusive waiters bit anyway.
+ */
if (v & LK_EXCLUSIVE_WAITERS) {
queue = SQ_EXCLUSIVE_QUEUE;
v &= ~LK_EXCLUSIVE_WAITERS;
} else {
MPASS(v & LK_SHARED_WAITERS);
+ MPASS(lk->lk_exslpfail == 0);
queue = SQ_SHARED_QUEUE;
v &= ~LK_SHARED_WAITERS;
}
+ if (queue == SQ_EXCLUSIVE_QUEUE) {
+ realexslp =
+ sleepq_sleepcnt(&lk->lock_object,
+ SQ_EXCLUSIVE_QUEUE);
+ if (lk->lk_exslpfail >= realexslp) {
+ lk->lk_exslpfail = 0;
+ queue = SQ_SHARED_QUEUE;
+ v &= ~LK_SHARED_WAITERS;
+ if (realexslp != 0) {
+ LOCK_LOG2(lk,
+ "%s: %p has only LK_SLEEPFAIL sleepers",
+ __func__, lk);
+ LOCK_LOG2(lk,
+ "%s: %p waking up threads on the exclusive queue",
+ __func__, lk);
+ wakeup_swapper =
+ sleepq_broadcast(
+ &lk->lock_object,
+ SLEEPQ_LK, 0,
+ SQ_EXCLUSIVE_QUEUE);
+ }
+ } else
+ lk->lk_exslpfail = 0;
+ }
if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
sleepq_release(&lk->lock_object);
continue;
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 2d777a280be5..37e5d032af9b 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -705,8 +705,12 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
* ideal. It gives precedence to shared waiters if they are
* present. For this condition, we have to preserve the
* state of the exclusive waiters flag.
+ * If interruptible sleeps left the shared queue empty avoid a
+ * starvation for the threads sleeping on the exclusive queue by giving
+ * them precedence and cleaning up the shared waiters bit anyway.
*/
- if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
+ if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
+ sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
queue = SQ_SHARED_QUEUE;
x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
} else
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index b3ae6fddc040..a0496bdaad35 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -118,6 +118,7 @@ __FBSDID("$FreeBSD$");
*/
struct sleepqueue {
TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
+ u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
void *sq_wchan; /* (c) Wait channel. */
@@ -306,9 +307,12 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
int i;
sq = td->td_sleepqueue;
- for (i = 0; i < NR_SLEEPQS; i++)
+ for (i = 0; i < NR_SLEEPQS; i++) {
KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
- ("thread's sleep queue %d is not empty", i));
+ ("thread's sleep queue %d is not empty", i));
+ KASSERT(sq->sq_blockedcnt[i] == 0,
+ ("thread's sleep queue %d count mismatches", i));
+ }
KASSERT(LIST_EMPTY(&sq->sq_free),
("thread's sleep queue has a non-empty free list"));
KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
@@ -334,6 +338,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
}
thread_lock(td);
TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
+ sq->sq_blockedcnt[queue]++;
td->td_sleepqueue = NULL;
td->td_sqqueue = queue;
td->td_wchan = wchan;
@@ -367,6 +372,22 @@ sleepq_set_timeout(void *wchan, int timo)
}
/*
+ * Return the number of actual sleepers for the specified queue.
+ */
+u_int
+sleepq_sleepcnt(void *wchan, int queue)
+{
+ struct sleepqueue *sq;
+
+ KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ MPASS((queue >= 0) && (queue < NR_SLEEPQS));
+ sq = sleepq_lookup(wchan);
+ if (sq == NULL)
+ return (0);
+ return (sq->sq_blockedcnt[queue]);
+}
+
+/*
* Marks the pending sleep of the current thread as interruptible and
* makes an initial check for pending signals before putting a thread
* to sleep. Enters and exits with the thread lock held. Thread lock
@@ -665,6 +686,7 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
mtx_assert(&sc->sc_lock, MA_OWNED);
/* Remove the thread from the queue. */
+ sq->sq_blockedcnt[td->td_sqqueue]--;
TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
/*
@@ -720,8 +742,10 @@ sleepq_dtor(void *mem, int size, void *arg)
int i;
sq = mem;
- for (i = 0; i < NR_SLEEPQS; i++)
+ for (i = 0; i < NR_SLEEPQS; i++) {
MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
+ MPASS(sq->sq_blockedcnt[i] == 0);
+ }
}
#endif
@@ -736,8 +760,10 @@ sleepq_init(void *mem, int size, int flags)
bzero(mem, size);
sq = mem;
- for (i = 0; i < NR_SLEEPQS; i++)
+ for (i = 0; i < NR_SLEEPQS; i++) {
TAILQ_INIT(&sq->sq_blocked[i]);
+ sq->sq_blockedcnt[i] = 0;
+ }
LIST_INIT(&sq->sq_free);
return (0);
}
@@ -1170,6 +1196,7 @@ found:
td->td_tid, td->td_proc->p_pid,
td->td_name);
}
+ db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
}
}
diff --git a/sys/sys/_lockmgr.h b/sys/sys/_lockmgr.h
index 0b99e1a4cba5..0367ff1e7ba1 100644
--- a/sys/sys/_lockmgr.h
+++ b/sys/sys/_lockmgr.h
@@ -38,6 +38,7 @@
struct lock {
struct lock_object lock_object;
volatile uintptr_t lk_lock;
+ u_int lk_exslpfail;
int lk_timo;
int lk_pri;
#ifdef DEBUG_LOCKS
diff --git a/sys/sys/param.h b/sys/sys/param.h
index d639a06f3fa7..79932bebd19a 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 900004 /* Master, propagated to newvers */
+#define __FreeBSD_version 900005 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>
diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h
index 362945aabeb4..224d602c01f4 100644
--- a/sys/sys/sleepqueue.h
+++ b/sys/sys/sleepqueue.h
@@ -109,6 +109,7 @@ void sleepq_release(void *wchan);
void sleepq_remove(struct thread *td, void *wchan);
int sleepq_signal(void *wchan, int flags, int pri, int queue);
void sleepq_set_timeout(void *wchan, int timo);
+u_int sleepq_sleepcnt(void *wchan, int queue);
int sleepq_timedwait(void *wchan, int pri);
int sleepq_timedwait_sig(void *wchan, int pri);
void sleepq_wait(void *wchan, int pri);