diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/compat/cloudabi/cloudabi_thread.c | 3 | ||||
-rw-r--r-- | sys/compat/linux/linux_fork.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_thr.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_thread.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_umtx.c | 632 | ||||
-rw-r--r-- | sys/sys/_umtx.h | 6 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/sys/umtx.h | 28 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 2 | ||||
-rw-r--r-- | sys/vm/vm_object.h | 1 | ||||
-rw-r--r-- | sys/vm/vnode_pager.c | 1 |
12 files changed, 522 insertions, 165 deletions
diff --git a/sys/compat/cloudabi/cloudabi_thread.c b/sys/compat/cloudabi/cloudabi_thread.c index 37dc79438e35..dd54e8923d36 100644 --- a/sys/compat/cloudabi/cloudabi_thread.c +++ b/sys/compat/cloudabi/cloudabi_thread.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/sched.h> #include <sys/syscallsubr.h> +#include <sys/umtx.h> #include <contrib/cloudabi/cloudabi_types_common.h> @@ -44,6 +45,8 @@ cloudabi_sys_thread_exit(struct thread *td, .scope = uap->scope, }; + umtx_thread_exit(td); + /* Wake up joining thread. */ cloudabi_sys_lock_unlock(td, &cloudabi_sys_lock_unlock_args); diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index de1d041c19ec..4c30c9ab6e4b 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sched.h> #include <sys/syscallsubr.h> #include <sys/sx.h> +#include <sys/umtx.h> #include <sys/unistd.h> #include <sys/wait.h> @@ -410,6 +411,8 @@ linux_exit(struct thread *td, struct linux_exit_args *args) LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); + umtx_thread_exit(td); + linux_thread_detach(td); /* diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 1cd9b62fbf78..0a0659d017ce 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -350,8 +350,11 @@ exit1(struct thread *td, int rval, int signo) KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } + PROC_UNLOCK(p); + umtx_thread_exit(td); + /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. @@ -595,7 +598,6 @@ exit1(struct thread *td, int rval, int signo) wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); - umtx_thread_exit(td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 1b226dd13cf3..75bd83d5ca2f 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -308,6 +308,8 @@ sys_thr_exit(struct thread *td, struct thr_exit_args *uap) /* long *state */ { + umtx_thread_exit(td); + /* Signal userland that it can free the stack. */ if ((void *)uap->state != NULL) { suword_lwpid(uap->state, 1); @@ -367,7 +369,6 @@ kern_thr_exit(struct thread *td) KASSERT(p->p_numthreads > 1, ("too few threads")); racct_sub(p, RACCT_NTHR, 1); tdsigcleanup(td); - umtx_thread_exit(td); PROC_SLOCK(p); thread_stopped(p); thread_exit(); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 66b6120e194f..21f358778a75 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -950,6 +950,7 @@ thread_suspend_check(int return_instead) */ if (__predict_false(p->p_sysent->sv_thread_detach != NULL)) (p->p_sysent->sv_thread_detach)(td); + umtx_thread_exit(td); kern_thr_exit(td); panic("stopped thread did not exit"); } diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 7ceb2cc0802b..8471591e648d 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 The FreeBSD Foundation + * Copyright (c) 2015, 2016 The FreeBSD Foundation * Copyright (c) 2004, David Xu <davidxu@freebsd.org> * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> * All rights reserved. @@ -212,6 +212,30 @@ struct abs_timeout { struct timespec end; }; +#ifdef COMPAT_FREEBSD32 +struct umutex32 { + volatile __lwpid_t m_owner; /* Owner of the mutex */ + __uint32_t m_flags; /* Flags of the mutex */ + __uint32_t m_ceilings[2]; /* Priority protect ceiling */ + __uint32_t m_rb_lnk; /* Robust linkage */ + __uint32_t m_pad; + __uint32_t m_spare[2]; +}; + +_Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); +_Static_assert(__offsetof(struct umutex, m_spare[0]) == + __offsetof(struct umutex32, m_spare[0]), "m_spare32"); +#endif + +int umtx_shm_vnobj_persistent = 0; +SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, + &umtx_shm_vnobj_persistent, 0, + "False forces destruction of umtx attached to file, on last close"); +static int umtx_max_rb = 1000; +SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, + &umtx_max_rb, 0, + ""); + static uma_zone_t umtx_pi_zone; static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); @@ -220,6 +244,10 @@ static int umtx_pi_allocated; static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, &umtx_pi_allocated, 0, "Allocated umtx_pi"); +static int umtx_verbose_rb = 1; +SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, + &umtx_verbose_rb, 0, + ""); #ifdef UMTX_PROFILING static long max_length; @@ -241,10 +269,11 @@ static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout static int umtxq_count(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); -static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); +static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, + bool rb); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, - struct image_params *imgp __unused); + struct image_params *imgp __unused); SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) @@ -423,7 +452,8 @@ umtxq_alloc(void) struct umtx_q *uq; uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); - uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); + uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, + M_WAITOK | M_ZERO); TAILQ_INIT(&uq->uq_spare_queue->head); TAILQ_INIT(&uq->uq_pi_contested); uq->uq_inherited_pri = PRI_MAX; @@ -433,6 +463,7 @@ umtxq_alloc(void) void umtxq_free(struct umtx_q *uq) { + MPASS(uq->uq_spare_queue != NULL); free(uq->uq_spare_queue, M_UMTX); free(uq, M_UMTX); @@ -441,13 +472,16 @@ umtxq_free(struct umtx_q *uq) static inline void umtxq_hash(struct umtx_key *key) { - unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; + unsigned n; + + n = (uintptr_t)key->info.both.a + key->info.both.b; key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; } static inline struct umtxq_chain * umtxq_getchain(struct umtx_key *key) { + if (key->type <= TYPE_SEM) return (&umtxq_chains[1][key->hash]); return (&umtxq_chains[0][key->hash]); @@ -750,13 +784,13 @@ abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) { abs_timeout_init(timo, umtxtime->_clockid, - (umtxtime->_flags & UMTX_ABSTIME) != 0, - &umtxtime->_timeout); + (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); } static inline void abs_timeout_update(struct abs_timeout *timo) { + kern_clock_gettime(curthread, timo->clockid, &timo->cur); } @@ -772,6 +806,19 @@ abs_timeout_gethz(struct abs_timeout *timo) return (tstohz(&tts)); } +static uint32_t +umtx_unlock_val(uint32_t flags, bool rb) +{ + + if (rb) + return (UMUTEX_RB_OWNERDEAD); + else if ((flags & UMUTEX_NONCONSISTENT) != 0) + return (UMUTEX_RB_NOTRECOV); + else + return (UMUTEX_UNOWNED); + +} + /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. @@ -866,7 +913,7 @@ umtx_key_release(struct umtx_key *key) */ static int do_wait(struct thread *td, void *addr, u_long id, - struct _umtx_time *timeout, int compat32, int is_private) + struct _umtx_time *timeout, int compat32, int is_private) { struct abs_timeout timo; struct umtx_q *uq; @@ -925,7 +972,7 @@ kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) int ret; if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, - is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) + is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) return (ret); umtxq_lock(&key); umtxq_signal(&key, n_wake); @@ -939,7 +986,7 @@ kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) */ static int do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, - struct _umtx_time *timeout, int mode) + struct _umtx_time *timeout, int mode) { struct abs_timeout timo; struct umtx_q *uq; @@ -961,11 +1008,38 @@ do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, if (rv == -1) return (EFAULT); if (mode == _UMUTEX_WAIT) { - if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) + if (owner == UMUTEX_UNOWNED || + owner == UMUTEX_CONTESTED || + owner == UMUTEX_RB_OWNERDEAD || + owner == UMUTEX_RB_NOTRECOV) return (0); } else { /* - * Try the uncontested case. This should be done in userland. + * Robust mutex terminated. Kernel duty is to + * return EOWNERDEAD to the userspace. The + * umutex.m_flags UMUTEX_NONCONSISTENT is set + * by the common userspace code. + */ + if (owner == UMUTEX_RB_OWNERDEAD) { + rv = casueword32(&m->m_owner, + UMUTEX_RB_OWNERDEAD, &owner, + id | UMUTEX_CONTESTED); + if (rv == -1) + return (EFAULT); + if (owner == UMUTEX_RB_OWNERDEAD) + return (EOWNERDEAD); /* success */ + rv = umtxq_check_susp(td); + if (rv != 0) + return (rv); + continue; + } + if (owner == UMUTEX_RB_NOTRECOV) + return (ENOTRECOVERABLE); + + + /* + * Try the uncontested case. This should be + * done in userland. */ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); @@ -977,7 +1051,10 @@ do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, if (owner == UMUTEX_UNOWNED) return (0); - /* If no one owns it but it is contested try to acquire it. */ + /* + * If no one owns it but it is contested try + * to acquire it. + */ if (owner == UMUTEX_CONTESTED) { rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, @@ -993,7 +1070,10 @@ do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, if (rv != 0) return (rv); - /* If this failed the lock has changed, restart. */ + /* + * If this failed the lock has + * changed, restart. + */ continue; } } @@ -1061,12 +1141,11 @@ do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int -do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; - uint32_t owner, old, id; - int error; - int count; + uint32_t owner, old, id, newlock; + int error, count; id = td->td_tid; /* @@ -1079,8 +1158,9 @@ do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); + newlock = umtx_unlock_val(flags, rb); if ((owner & UMUTEX_CONTESTED) == 0) { - error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); + error = casueword32(&m->m_owner, owner, &old, newlock); if (error == -1) return (EFAULT); if (old == owner) @@ -1103,10 +1183,11 @@ do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ - error = casueword32(&m->m_owner, owner, &old, - count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); + if (count > 1) + newlock |= UMUTEX_CONTESTED; + error = casueword32(&m->m_owner, owner, &old, newlock); umtxq_lock(&key); - umtxq_signal(&key,1); + umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); @@ -1134,7 +1215,8 @@ do_wake_umutex(struct thread *td, struct umutex *m) if (error == -1) return (EFAULT); - if ((owner & ~UMUTEX_CONTESTED) != 0) + if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && + owner != UMUTEX_RB_NOTRECOV) return (0); error = fueword32(&m->m_flags, &flags); @@ -1151,7 +1233,8 @@ do_wake_umutex(struct thread *td, struct umutex *m) count = umtxq_count(&key); umtxq_unlock(&key); - if (count <= 1) { + if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && + owner != UMUTEX_RB_NOTRECOV) { error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, UMUTEX_UNOWNED); if (error == -1) @@ -1159,7 +1242,8 @@ do_wake_umutex(struct thread *td, struct umutex *m) } umtxq_lock(&key); - if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) + if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || + owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); @@ -1179,21 +1263,28 @@ do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) int error; int count; - switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { + switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | + UMUTEX_ROBUST)) { case 0: + case UMUTEX_ROBUST: type = TYPE_NORMAL_UMUTEX; break; case UMUTEX_PRIO_INHERIT: type = TYPE_PI_UMUTEX; break; + case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): + type = TYPE_PI_ROBUST_UMUTEX; + break; case UMUTEX_PRIO_PROTECT: type = TYPE_PP_UMUTEX; break; + case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): + type = TYPE_PP_ROBUST_UMUTEX; + break; default: return (EINVAL); } - if ((error = umtx_key_get(m, type, GET_SHARE(flags), - &key)) != 0) + if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) return (error); owner = 0; @@ -1229,7 +1320,7 @@ do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) if (error == -1) error = EFAULT; while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && - (owner & UMUTEX_CONTESTED) == 0) { + (owner & UMUTEX_CONTESTED) == 0) { error = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); if (error == -1) { @@ -1247,7 +1338,8 @@ do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) umtxq_lock(&key); if (error == EFAULT) { umtxq_signal(&key, INT_MAX); - } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) + } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || + owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); @@ -1481,6 +1573,7 @@ static int umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq; + int pri; mtx_lock(&umtx_lock); if (pi->pi_owner == owner) { @@ -1498,8 +1591,6 @@ umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) umtx_pi_setowner(pi, owner); uq = TAILQ_FIRST(&pi->pi_blocked); if (uq != NULL) { - int pri; - pri = UPRI(uq->uq_thread); thread_lock(owner); if (pri < UPRI(owner)) @@ -1537,15 +1628,15 @@ umtx_pi_adjust(struct thread *td, u_char oldpri) * Sleep on a PI mutex. */ static int -umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, - uint32_t owner, const char *wmesg, struct abs_timeout *timo) +umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, + const char *wmesg, struct abs_timeout *timo, bool shared) { struct umtxq_chain *uc; struct thread *td, *td1; struct umtx_q *uq1; - int pri; - int error = 0; + int error, pri; + error = 0; td = uq->uq_thread; KASSERT(td == curthread, ("inconsistent uq_thread")); uc = umtxq_getchain(&uq->uq_key); @@ -1555,8 +1646,7 @@ umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, mtx_lock(&umtx_lock); if (pi->pi_owner == NULL) { mtx_unlock(&umtx_lock); - /* XXX Only look up thread in current process. */ - td1 = tdfind(owner, curproc->p_pid); + td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); mtx_lock(&umtx_lock); if (td1 != NULL) { if (pi->pi_owner == NULL) @@ -1680,13 +1770,14 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, struct abs_timeout timo; struct umtx_q *uq; struct umtx_pi *pi, *new_pi; - uint32_t id, owner, old; + uint32_t id, old_owner, owner, old; int error, rv; id = td->td_tid; uq = td->td_umtxq; - if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), + if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? + TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); @@ -1737,17 +1828,23 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, break; } + if (owner == UMUTEX_RB_NOTRECOV) { + error = ENOTRECOVERABLE; + break; + } + /* If no one owns it but it is contested try to acquire it. */ - if (owner == UMUTEX_CONTESTED) { - rv = casueword32(&m->m_owner, - UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); + if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { + old_owner = owner; + rv = casueword32(&m->m_owner, owner, &owner, + id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } - if (owner == UMUTEX_CONTESTED) { + if (owner == old_owner) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); error = umtx_pi_claim(pi, td); @@ -1762,8 +1859,11 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, */ (void)casuword32(&m->m_owner, id | UMUTEX_CONTESTED, - UMUTEX_CONTESTED); + old_owner); } + if (error == 0 && + old_owner == UMUTEX_RB_OWNERDEAD) + error = EOWNERDEAD; break; } @@ -1802,8 +1902,8 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, * either some one else has acquired the lock or it has been * released. */ - rv = casueword32(&m->m_owner, owner, &old, - owner | UMUTEX_CONTESTED); + rv = casueword32(&m->m_owner, owner, &old, owner | + UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { @@ -1816,11 +1916,14 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread - * unlocking the umtx. + * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD + * value for owner is impossible there. */ if (old == owner) { - error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, - "umtxpi", timeout == NULL ? NULL : &timo); + error = umtxq_sleep_pi(uq, pi, + owner & ~UMUTEX_CONTESTED, + "umtxpi", timeout == NULL ? NULL : &timo, + (flags & USYNC_PROCESS_SHARED) != 0); if (error != 0) continue; } else { @@ -1845,15 +1948,13 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, * Unlock a PI mutex. */ static int -do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; - uint32_t owner, old, id; - int error; - int count; - int pri; + uint32_t id, new_owner, old, owner; + int count, error, pri; id = td->td_tid; /* @@ -1866,9 +1967,11 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); + new_owner = umtx_unlock_val(flags, rb); + /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { - error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); + error = casueword32(&m->m_owner, owner, &old, new_owner); if (error == -1) return (EFAULT); if (old == owner) @@ -1877,7 +1980,8 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) } /* We should only ever be in here for contested locks */ - if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), + if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? + TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); @@ -1888,7 +1992,7 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) mtx_lock(&umtx_lock); pi = uq_first->uq_pi_blocked; KASSERT(pi != NULL, ("pi == NULL?")); - if (pi->pi_owner != td) { + if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { mtx_unlock(&umtx_lock); umtxq_unbusy(&key); umtxq_unlock(&key); @@ -1897,11 +2001,12 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) return (EPERM); } uq_me = td->td_umtxq; - umtx_pi_disown(pi); + if (pi->pi_owner == td) + umtx_pi_disown(pi); /* get highest priority thread which is still sleeping. */ uq_first = TAILQ_FIRST(&pi->pi_blocked); while (uq_first != NULL && - (uq_first->uq_flags & UQF_UMTXQ) == 0) { + (uq_first->uq_flags & UQF_UMTXQ) == 0) { uq_first = TAILQ_NEXT(uq_first, uq_lockq); } pri = PRI_MAX; @@ -1945,8 +2050,10 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ - error = casueword32(&m->m_owner, owner, &old, - count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); + + if (count > 1) + new_owner |= UMUTEX_CONTESTED; + error = casueword32(&m->m_owner, owner, &old, new_owner); umtxq_unbusy_unlocked(&key); umtx_key_release(&key); @@ -1973,7 +2080,8 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, id = td->td_tid; uq = td->td_umtxq; - if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), + if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? + TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); @@ -2013,8 +2121,8 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, } mtx_unlock(&umtx_lock); - rv = casueword32(&m->m_owner, - UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); + rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, + id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; @@ -2024,6 +2132,21 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, if (owner == UMUTEX_CONTESTED) { error = 0; break; + } else if (owner == UMUTEX_RB_OWNERDEAD) { + rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, + &owner, id | UMUTEX_CONTESTED); + if (rv == -1) { + error = EFAULT; + break; + } + if (owner == UMUTEX_RB_OWNERDEAD) { + error = EOWNERDEAD; /* success */ + break; + } + error = 0; + } else if (owner == UMUTEX_RB_NOTRECOV) { + error = ENOTRECOVERABLE; + break; } if (try != 0) { @@ -2064,7 +2187,7 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, mtx_unlock(&umtx_lock); } - if (error != 0) { + if (error != 0 && error != EOWNERDEAD) { mtx_lock(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; @@ -2093,13 +2216,12 @@ out: * Unlock a PP mutex. */ static int -do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; - uint32_t owner, id; - uint32_t rceiling; + uint32_t id, owner, rceiling; int error, pri, new_inherited_pri, su; id = td->td_tid; @@ -2129,7 +2251,8 @@ do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) new_inherited_pri = PRI_MIN_REALTIME + rceiling; } - if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), + if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? + TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); @@ -2141,7 +2264,8 @@ do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ - error = suword32(&m->m_owner, UMUTEX_CONTESTED); + error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | + UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) @@ -2176,13 +2300,11 @@ do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) static int do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, - uint32_t *old_ceiling) + uint32_t *old_ceiling) { struct umtx_q *uq; - uint32_t save_ceiling; - uint32_t owner, id; - uint32_t flags; - int error, rv; + uint32_t flags, id, owner, save_ceiling; + int error, rv, rv1; error = fueword32(&m->m_flags, &flags); if (error == -1) @@ -2193,8 +2315,9 @@ do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, return (EINVAL); id = td->td_tid; uq = td->td_umtxq; - if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), - &uq->uq_key)) != 0) + if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? + TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), + &uq->uq_key)) != 0) return (error); for (;;) { umtxq_lock(&uq->uq_key); @@ -2207,23 +2330,31 @@ do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, break; } - rv = casueword32(&m->m_owner, - UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); + rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, + id | UMUTEX_CONTESTED); if (rv == -1) { error = EFAULT; break; } if (owner == UMUTEX_CONTESTED) { - suword32(&m->m_ceilings[0], ceiling); - suword32(&m->m_owner, UMUTEX_CONTESTED); - error = 0; + rv = suword32(&m->m_ceilings[0], ceiling); + rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); + error = (rv == 0 && rv1 == 0) ? 0: EFAULT; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { - suword32(&m->m_ceilings[0], ceiling); - error = 0; + rv = suword32(&m->m_ceilings[0], ceiling); + error = rv == 0 ? 0 : EFAULT; + break; + } + + if (owner == UMUTEX_RB_OWNERDEAD) { + error = EOWNERDEAD; + break; + } else if (owner == UMUTEX_RB_NOTRECOV) { + error = ENOTRECOVERABLE; break; } @@ -2252,8 +2383,10 @@ do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); - if (error == 0 && old_ceiling != NULL) - suword32(old_ceiling, save_ceiling); + if (error == 0 && old_ceiling != NULL) { + rv = suword32(old_ceiling, save_ceiling); + error = rv == 0 ? 0 : EFAULT; + } return (error); } @@ -2271,7 +2404,7 @@ do_lock_umutex(struct thread *td, struct umutex *m, if (error == -1) return (EFAULT); - switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { + switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: error = do_lock_normal(td, m, flags, timeout, mode); break; @@ -2299,7 +2432,7 @@ do_lock_umutex(struct thread *td, struct umutex *m, * Unlock a userland POSIX mutex. */ static int -do_unlock_umutex(struct thread *td, struct umutex *m) +do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) { uint32_t flags; int error; @@ -2308,13 +2441,13 @@ do_unlock_umutex(struct thread *td, struct umutex *m) if (error == -1) return (EFAULT); - switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { + switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: - return (do_unlock_normal(td, m, flags)); + return (do_unlock_normal(td, m, flags, rb)); case UMUTEX_PRIO_INHERIT: - return (do_unlock_pi(td, m, flags)); + return (do_unlock_pi(td, m, flags, rb)); case UMUTEX_PRIO_PROTECT: - return (do_unlock_pp(td, m, flags)); + return (do_unlock_pp(td, m, flags, rb)); } return (EINVAL); @@ -2322,7 +2455,7 @@ do_unlock_umutex(struct thread *td, struct umutex *m) static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, - struct timespec *timeout, u_long wflags) + struct timespec *timeout, u_long wflags) { struct abs_timeout timo; struct umtx_q *uq; @@ -2368,11 +2501,11 @@ do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, umtxq_unbusy_unlocked(&uq->uq_key); - error = do_unlock_umutex(td, m); + error = do_unlock_umutex(td, m, false); if (timeout != NULL) - abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), - timeout); + abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, + timeout); umtxq_lock(&uq->uq_key); if (error == 0) { @@ -3181,7 +3314,7 @@ __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); + return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); } static int @@ -3199,7 +3332,7 @@ __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); + return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); } static int @@ -3217,12 +3350,13 @@ __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); + return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); } static int __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) { + return (kern_umtx_wake(td, uap->obj, uap->val, 0)); } @@ -3230,24 +3364,20 @@ __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) static int __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) { - int count = uap->val; - void *uaddrs[BATCH_SIZE]; - char **upp = (char **)uap->obj; - int tocopy; - int error = 0; - int i, pos = 0; + char *uaddrs[BATCH_SIZE], **upp; + int count, error, i, pos, tocopy; - while (count > 0) { - tocopy = count; - if (tocopy > BATCH_SIZE) - tocopy = BATCH_SIZE; - error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); + upp = (char **)uap->obj; + error = 0; + for (count = uap->val, pos = 0; count > 0; count -= tocopy, + pos += tocopy) { + tocopy = MIN(count, BATCH_SIZE); + error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); if (error != 0) break; for (i = 0; i < tocopy; ++i) kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); - count -= tocopy; - pos += tocopy; + maybe_yield(); } return (error); } @@ -3255,6 +3385,7 @@ __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) static int __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) { + return (kern_umtx_wake(td, uap->obj, uap->val, 1)); } @@ -3274,13 +3405,14 @@ __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, 0); + return (do_lock_umutex(td, uap->obj, tm_p, 0)); } static int __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); + + return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); } static int @@ -3299,25 +3431,28 @@ __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); + return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); } static int __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_wake_umutex(td, uap->obj); + + return (do_wake_umutex(td, uap->obj)); } static int __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_unlock_umutex(td, uap->obj); + + return (do_unlock_umutex(td, uap->obj, false)); } static int __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) { - return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); + + return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); } static int @@ -3341,13 +3476,15 @@ __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) static int __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) { - return do_cv_signal(td, uap->obj); + + return (do_cv_signal(td, uap->obj)); } static int __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) { - return do_cv_broadcast(td, uap->obj); + + return (do_cv_broadcast(td, uap->obj)); } static int @@ -3392,7 +3529,8 @@ __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) static int __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) { - return do_rw_unlock(td, uap->obj); + + return (do_rw_unlock(td, uap->obj)); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) @@ -3760,6 +3898,31 @@ __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) return (umtx_shm(td, uap->uaddr1, uap->val)); } +static int +umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) +{ + + td->td_rb_list = rbp->robust_list_offset; + td->td_rbp_list = rbp->robust_priv_list_offset; + td->td_rb_inact = rbp->robust_inact_offset; + return (0); +} + +static int +__umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) +{ + struct umtx_robust_lists_params rb; + int error; + + if (uap->val > sizeof(rb)) + return (EINVAL); + bzero(&rb, sizeof(rb)); + error = copyin(uap->uaddr1, &rb, uap->val); + if (error != 0) + return (error); + return (umtx_robust_lists(td, &rb)); +} + typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); static const _umtx_op_func op_table[] = { @@ -3794,6 +3957,7 @@ static const _umtx_op_func op_table[] = { [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, [UMTX_OP_SHM] = __umtx_op_shm, + [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, }; int @@ -3877,7 +4041,7 @@ __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); + return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); } static int @@ -3896,7 +4060,7 @@ __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, 0); + return (do_lock_umutex(td, uap->obj, tm_p, 0)); } static int @@ -3915,7 +4079,7 @@ __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); + return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); } static int @@ -3989,7 +4153,7 @@ __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *ua return (error); tm_p = &timeout; } - return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); + return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) @@ -4035,34 +4199,56 @@ __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) static int __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) { - int count = uap->val; - uint32_t uaddrs[BATCH_SIZE]; - uint32_t **upp = (uint32_t **)uap->obj; - int tocopy; - int error = 0; - int i, pos = 0; + uint32_t uaddrs[BATCH_SIZE], **upp; + int count, error, i, pos, tocopy; - while (count > 0) { - tocopy = count; - if (tocopy > BATCH_SIZE) - tocopy = BATCH_SIZE; - error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); + upp = (uint32_t **)uap->obj; + error = 0; + for (count = uap->val, pos = 0; count > 0; count -= tocopy, + pos += tocopy) { + tocopy = MIN(count, BATCH_SIZE); + error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); if (error != 0) break; for (i = 0; i < tocopy; ++i) kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], - INT_MAX, 1); - count -= tocopy; - pos += tocopy; + INT_MAX, 1); + maybe_yield(); } return (error); } +struct umtx_robust_lists_params_compat32 { + uint32_t robust_list_offset; + uint32_t robust_priv_list_offset; + uint32_t robust_inact_offset; +}; + +static int +__umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) +{ + struct umtx_robust_lists_params rb; + struct umtx_robust_lists_params_compat32 rb32; + int error; + + if (uap->val > sizeof(rb32)) + return (EINVAL); + bzero(&rb, sizeof(rb)); + bzero(&rb32, sizeof(rb32)); + error = copyin(uap->uaddr1, &rb32, uap->val); + if (error != 0) + return (error); + rb.robust_list_offset = rb32.robust_list_offset; + rb.robust_priv_list_offset = rb32.robust_priv_list_offset; + rb.robust_inact_offset = rb32.robust_inact_offset; + return (umtx_robust_lists(td, &rb)); +} + static const _umtx_op_func op_table_compat32[] = { [UMTX_OP_RESERVED0] = __umtx_op_unimpl, [UMTX_OP_RESERVED1] = __umtx_op_unimpl, - [UMTX_OP_WAIT] = __umtx_op_wait_compat32, - [UMTX_OP_WAKE] = __umtx_op_wake, + [UMTX_OP_WAIT] = __umtx_op_wait_compat32, + [UMTX_OP_WAKE] = __umtx_op_wake, [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, @@ -4090,6 +4276,7 @@ static const _umtx_op_func op_table_compat32[] = { [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, [UMTX_OP_SHM] = __umtx_op_shm, + [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, }; int @@ -4107,6 +4294,7 @@ freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) void umtx_thread_init(struct thread *td) { + td->td_umtxq = umtxq_alloc(); td->td_umtxq->uq_thread = td; } @@ -4114,6 +4302,7 @@ umtx_thread_init(struct thread *td) void umtx_thread_fini(struct thread *td) { + umtxq_free(td->td_umtxq); } @@ -4136,12 +4325,32 @@ umtx_thread_alloc(struct thread *td) /* * exec() hook. + * + * Clear robust lists for all process' threads, not delaying the + * cleanup to thread_exit hook, since the relevant address space is + * destroyed right now. */ static void -umtx_exec_hook(void *arg __unused, struct proc *p __unused, - struct image_params *imgp __unused) +umtx_exec_hook(void *arg __unused, struct proc *p, + struct image_params *imgp __unused) { - umtx_thread_cleanup(curthread); + struct thread *td; + + KASSERT(p == curproc, ("need curproc")); + PROC_LOCK(p); + KASSERT((p->p_flag & P_HADTHREADS) == 0 || + (p->p_flag & P_STOPPED_SINGLE) != 0, + ("curproc must be single-threaded")); + FOREACH_THREAD_IN_PROC(p, td) { + KASSERT(td == curthread || + ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), + ("running thread %p %p", p, td)); + PROC_UNLOCK(p); + umtx_thread_cleanup(td); + PROC_LOCK(p); + td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; + } + PROC_UNLOCK(p); } /* @@ -4150,29 +4359,136 @@ umtx_exec_hook(void *arg __unused, struct proc *p __unused, void umtx_thread_exit(struct thread *td) { + umtx_thread_cleanup(td); } +static int +umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) +{ + u_long res1; +#ifdef COMPAT_FREEBSD32 + uint32_t res32; +#endif + int error; + +#ifdef COMPAT_FREEBSD32 + if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + error = fueword32((void *)ptr, &res32); + if (error == 0) + res1 = res32; + } else +#endif + { + error = fueword((void *)ptr, &res1); + } + if (error == 0) + *res = res1; + else + error = EFAULT; + return (error); +} + +static void +umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) +{ +#ifdef COMPAT_FREEBSD32 + struct umutex32 m32; + + if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + memcpy(&m32, m, sizeof(m32)); + *rb_list = m32.m_rb_lnk; + } else +#endif + *rb_list = m->m_rb_lnk; +} + +static int +umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) +{ + struct umutex m; + int error; + + KASSERT(td->td_proc == curproc, ("need current vmspace")); + error = copyin((void *)rbp, &m, sizeof(m)); + if (error != 0) + return (error); + if (rb_list != NULL) + umtx_read_rb_list(td, &m, rb_list); + if ((m.m_flags & UMUTEX_ROBUST) == 0) + return (EINVAL); + if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) + /* inact is cleared after unlock, allow the inconsistency */ + return (inact ? 0 : EINVAL); + return (do_unlock_umutex(td, (struct umutex *)rbp, true)); +} + +static void +umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, + const char *name) +{ + int error, i; + uintptr_t rbp; + bool inact; + + if (rb_list == 0) + return; + error = umtx_read_uptr(td, rb_list, &rbp); + for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { + if (rbp == *rb_inact) { + inact = true; + *rb_inact = 0; + } else + inact = false; + error = umtx_handle_rb(td, rbp, &rbp, inact); + } + if (i == umtx_max_rb && umtx_verbose_rb) { + uprintf("comm %s pid %d: reached umtx %smax rb %d\n", + td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); + } + if (error != 0 && umtx_verbose_rb) { + uprintf("comm %s pid %d: handling %srb error %d\n", + td->td_proc->p_comm, td->td_proc->p_pid, name, error); + } +} + /* - * clean up umtx data. + * Clean up umtx data. */ static void umtx_thread_cleanup(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; + uintptr_t rb_inact; - if ((uq = td->td_umtxq) == NULL) - return; - - mtx_lock(&umtx_lock); - uq->uq_inherited_pri = PRI_MAX; - while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { - pi->pi_owner = NULL; - TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); + /* + * Disown pi mutexes. + */ + uq = td->td_umtxq; + if (uq != NULL) { + mtx_lock(&umtx_lock); + uq->uq_inherited_pri = PRI_MAX; + while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { + pi->pi_owner = NULL; + TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); + } + mtx_unlock(&umtx_lock); + thread_lock(td); + sched_lend_user_prio(td, PRI_MAX); + thread_unlock(td); } - mtx_unlock(&umtx_lock); - thread_lock(td); - sched_lend_user_prio(td, PRI_MAX); - thread_unlock(td); + + /* + * Handle terminated robust mutexes. Must be done after + * robust pi disown, otherwise unlock could see unowned + * entries. + */ + rb_inact = td->td_rb_inact; + if (rb_inact != 0) + (void)umtx_read_uptr(td, rb_inact, &rb_inact); + umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); + umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); + if (rb_inact != 0) + (void)umtx_handle_rb(td, rb_inact, NULL, true); } diff --git a/sys/sys/_umtx.h b/sys/sys/_umtx.h index e8a400fdf8cd..d94f86b0f566 100644 --- a/sys/sys/_umtx.h +++ b/sys/sys/_umtx.h @@ -37,7 +37,11 @@ struct umutex { volatile __lwpid_t m_owner; /* Owner of the mutex */ __uint32_t m_flags; /* Flags of the mutex */ __uint32_t m_ceilings[2]; /* Priority protect ceiling */ - __uint32_t m_spare[4]; + __uintptr_t m_rb_lnk; /* Robust linkage */ +#ifndef __LP64__ + __uint32_t m_pad; +#endif + __uint32_t m_spare[2]; }; struct ucond { diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 35303fc74f9c..1cb34bfe0bb3 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -282,6 +282,9 @@ struct thread { int td_no_sleeping; /* (k) Sleeping disabled count. */ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ + uintptr_t td_rb_list; /* (k) Robust list head. */ + uintptr_t td_rbp_list; /* (k) Robust priv list head. */ + uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ diff --git a/sys/sys/umtx.h b/sys/sys/umtx.h index 325148c61449..d2ff5acd0b74 100644 --- a/sys/sys/umtx.h +++ b/sys/sys/umtx.h @@ -32,13 +32,26 @@ #include <sys/_umtx.h> +/* Common lock flags */ #define USYNC_PROCESS_SHARED 0x0001 /* Process shared sync objs */ -#define UMUTEX_UNOWNED 0x0 -#define UMUTEX_CONTESTED 0x80000000U - +/* umutex flags */ #define UMUTEX_PRIO_INHERIT 0x0004 /* Priority inherited mutex */ #define UMUTEX_PRIO_PROTECT 0x0008 /* Priority protect mutex */ +#define UMUTEX_ROBUST 0x0010 /* Robust mutex */ +#define UMUTEX_NONCONSISTENT 0x0020 /* Robust locked but not consistent */ + +/* + * The umutex.m_lock values and bits. The m_owner is the word which + * serves as the lock. Its high bit is the contention indicator and + * rest of bits records the owner TID. TIDs values start with PID_MAX + * + 2 and end by INT32_MAX. The low range [1..PID_MAX] is guaranteed + * to be useable as the special markers. + */ +#define UMUTEX_UNOWNED 0x0 +#define UMUTEX_CONTESTED 0x80000000U +#define UMUTEX_RB_OWNERDEAD (UMUTEX_CONTESTED | 0x10) +#define UMUTEX_RB_NOTRECOV (UMUTEX_CONTESTED | 0x11) /* urwlock flags */ #define URWLOCK_PREFER_READER 0x0002 @@ -84,6 +97,7 @@ #define UMTX_OP_SEM2_WAIT 23 #define UMTX_OP_SEM2_WAKE 24 #define UMTX_OP_SHM 25 +#define UMTX_OP_ROBUST_LISTS 26 /* Flags for UMTX_OP_CV_WAIT */ #define CVWAIT_CHECK_UNPARKING 0x01 @@ -100,6 +114,12 @@ #define UMTX_SHM_DESTROY 0x0004 #define UMTX_SHM_ALIVE 0x0008 +struct umtx_robust_lists_params { + uintptr_t robust_list_offset; + uintptr_t robust_priv_list_offset; + uintptr_t robust_inact_offset; +}; + #ifndef _KERNEL int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2); @@ -122,6 +142,8 @@ enum { TYPE_RWLOCK, TYPE_FUTEX, TYPE_SHM, + TYPE_PI_ROBUST_UMUTEX, + TYPE_PP_ROBUST_UMUTEX, }; /* Key to represent a unique userland synchronous object */ diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 4a0479b03bd4..cf6eda82d722 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -476,7 +476,7 @@ vm_object_vndeallocate(vm_object_t object) } #endif - if (object->ref_count == 1) + if (!umtx_shm_vnobj_persistent && object->ref_count == 1) umtx_shm_object_terminated(object); /* diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index c68fdce09158..cfc583b8db01 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -300,6 +300,7 @@ vm_object_cache_is_empty(vm_object_t object) void umtx_shm_object_init(vm_object_t object); void umtx_shm_object_terminated(vm_object_t object); +extern int umtx_shm_vnobj_persistent; vm_object_t vm_object_allocate (objtype_t, vm_pindex_t); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index f39afc2f951d..cc439766c967 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -164,6 +164,7 @@ vnode_destroy_vobject(struct vnode *vp) return; ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); VM_OBJECT_WLOCK(obj); + umtx_shm_object_terminated(obj); if (obj->ref_count == 0) { /* * don't double-terminate the object |