aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/kern_sx.c
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2016-08-01 21:48:37 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2016-08-01 21:48:37 +0000
commit1ada904147c50b3aad694c25fd46409324e79f36 (patch)
treeb028139badd977a970d22c6915536a75e4786fcf /sys/kern/kern_sx.c
parente72a746a6edb54194f8c97ff967c6ea489e26dc8 (diff)
downloadsrc-1ada904147c50b3aad694c25fd46409324e79f36.tar.gz
src-1ada904147c50b3aad694c25fd46409324e79f36.zip
Implement trivial backoff for locking primitives.
All current spinning loops retry an atomic op the first chance they get, which leads to performance degradation under load. One classic solution to the problem consists of delaying the test to an extent. This implementation has a trivial linear increment and a random factor for each attempt. For simplicity, this first thouch implementation only modifies spinning loops where the lock owner is running. spin mutexes and thread lock were not modified. Current parameters are autotuned on boot based on mp_cpus. Autotune factors are very conservative and are subject to change later. Reviewed by: kib, jhb Tested by: pho MFC after: 1 week
Notes
Notes: svn path=/head/; revision=303643
Diffstat (limited to 'sys/kern/kern_sx.c')
-rw-r--r--sys/kern/kern_sx.c69
1 files changed, 50 insertions, 19 deletions
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 78d207d7d80e..e43a28615721 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sleepqueue.h>
#include <sys/sx.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
@@ -145,6 +146,33 @@ static u_int asx_loops = 10000;
static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
+
+static struct lock_delay_config sx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
+ 0, "");
+
+static void
+sx_delay_sysinit(void *dummy)
+{
+
+ sx_delay.initial = mp_ncpus * 25;
+ sx_delay.step = (mp_ncpus * 25) / 2;
+ sx_delay.min = mp_ncpus * 5;
+ sx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(sx_delay_sysinit);
#endif
void
@@ -513,9 +541,11 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
int contested = 0;
#endif
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -524,6 +554,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
+
/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -549,7 +583,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -578,12 +612,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -605,7 +635,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
break;
cpu_spinwait();
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
}
KTR_STATE0(KTR_SCHED, "thread",
@@ -725,7 +755,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
@@ -818,9 +848,11 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
#endif
uintptr_t x;
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -829,6 +861,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
#ifdef KDTRACE_HOOKS
state = sx->sx_lock;
all_time -= lockstat_nsecs(&sx->lock_object);
@@ -840,7 +875,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
*/
for (;;) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
x = sx->sx_lock;
@@ -888,12 +923,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
"lockname:\"%s\"", sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -989,7 +1020,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));