aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJamie Gritton <jamie@FreeBSD.org>2021-02-21 18:55:44 +0000
committerJamie Gritton <jamie@FreeBSD.org>2021-02-21 18:55:44 +0000
commitf7496dcab0360a74bfb00cd6118f66323fffda61 (patch)
tree3d2905671ada32ea912fac4d913fbcc430628b6f
parentb963ce4588b33f733aef3f7a7f3fbe5018a89728 (diff)
downloadsrc-f7496dcab0360a74bfb00cd6118f66323fffda61.tar.gz
src-f7496dcab0360a74bfb00cd6118f66323fffda61.zip
jail: Change the locking around pr_ref and pr_uref
Require both the prison mutex and allprison_lock when pr_ref or pr_uref go to/from zero. Adding a non-first or removing a non-last reference remain lock-free. This means that a shared hold on allprison_lock is sufficient for prison_isalive() to be useful, which removes a number of cases of lock/check/unlock on the prison mutex. Expand the locking in kern_jail_set() to keep allprison_lock held exclusive until the new prison is valid, thus making invalid prisons invisible to any thread holding allprison_lock (except of course the one creating or destroying the prison). This renders prison_isvalid() nearly redundant, now used only in asserts. Differential Revision: https://reviews.freebsd.org/D28419 Differential Revision: https://reviews.freebsd.org/D28458
-rw-r--r--sys/kern/kern_jail.c423
-rw-r--r--sys/kern/sysv_msg.c2
-rw-r--r--sys/kern/sysv_sem.c2
-rw-r--r--sys/kern/sysv_shm.c2
-rw-r--r--sys/kern/uipc_mqueue.c35
-rw-r--r--sys/sys/jail.h3
6 files changed, 232 insertions, 235 deletions
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 65201eb12951..48c91a95bf1a 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -137,9 +137,11 @@ LIST_HEAD(, prison_racct) allprison_racct;
int lastprid = 0;
static int get_next_prid(struct prison **insprp);
-static int do_jail_attach(struct thread *td, struct prison *pr);
+static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
+static int prison_lock_xlock(struct prison *pr, int flags);
+static void prison_free_not_last(struct prison *pr);
static void prison_set_allow_locked(struct prison *pr, unsigned flag,
int enable);
static char *prison_path(struct prison *pr1, struct prison *pr2);
@@ -1006,18 +1008,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* where it can be inserted later.
*/
TAILQ_FOREACH(inspr, &allprison, pr_list) {
- if (inspr->pr_id == jid) {
- mtx_lock(&inspr->pr_mtx);
- if (prison_isvalid(inspr)) {
- pr = inspr;
- drflags |= PD_LOCKED;
- inspr = NULL;
- } else
- mtx_unlock(&inspr->pr_mtx);
- break;
- }
+ if (inspr->pr_id < jid)
+ continue;
if (inspr->pr_id > jid)
break;
+ pr = inspr;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ inspr = NULL;
+ break;
}
if (pr != NULL) {
ppr = pr->pr_parent;
@@ -1041,13 +1040,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = ENOENT;
vfs_opterror(opts, "jail %d not found", jid);
goto done_deref;
- } else if (!prison_isalive(pr)) {
+ }
+ if (!prison_isalive(pr)) {
if (!(flags & JAIL_DYING)) {
error = ENOENT;
vfs_opterror(opts, "jail %d is dying",
jid);
goto done_deref;
- } else if ((flags & JAIL_ATTACH) ||
+ }
+ if ((flags & JAIL_ATTACH) ||
(pr_flags & PR_PERSIST)) {
/*
* A dying jail might be resurrected
@@ -1121,12 +1122,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
if (namelc[0] != '\0') {
pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
- name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
if (tpr != pr &&
!strcmp(tpr->pr_name + pnamelen, namelc)) {
- mtx_lock(&tpr->pr_mtx);
if (prison_isalive(tpr)) {
if (pr == NULL &&
cuflags != JAIL_CREATE) {
@@ -1135,6 +1134,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* for updates.
*/
pr = tpr;
+ mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
break;
}
@@ -1144,28 +1144,22 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* active sibling jail.
*/
error = EEXIST;
- mtx_unlock(&tpr->pr_mtx);
vfs_opterror(opts,
"jail \"%s\" already exists",
name);
goto done_deref;
}
if (pr == NULL &&
- cuflags != JAIL_CREATE &&
- prison_isvalid(tpr))
+ cuflags != JAIL_CREATE) {
deadpr = tpr;
- mtx_unlock(&tpr->pr_mtx);
+ }
}
}
/* If no active jail is found, use a dying one. */
if (deadpr != NULL && pr == NULL) {
if (flags & JAIL_DYING) {
- mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto name_again;
- }
pr = deadpr;
+ mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
} else if (cuflags == JAIL_UPDATE) {
error = ENOENT;
@@ -1199,19 +1193,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
vfs_opterror(opts, "prison limit exceeded");
goto done_deref;
}
- mtx_lock(&ppr->pr_mtx);
- if (!prison_isvalid(ppr)) {
- mtx_unlock(&ppr->pr_mtx);
- error = ENOENT;
- vfs_opterror(opts, "jail \"%s\" not found",
- prison_name(mypr, ppr));
- goto done_deref;
- }
prison_hold(ppr);
- if (refcount_acquire(&ppr->pr_uref))
- mtx_unlock(&ppr->pr_mtx);
- else {
+ if (!refcount_acquire_if_not_zero(&ppr->pr_uref)) {
/* This brings the parent back to life. */
+ mtx_lock(&ppr->pr_mtx);
+ refcount_acquire(&ppr->pr_uref);
mtx_unlock(&ppr->pr_mtx);
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
if (error) {
@@ -1219,7 +1205,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
drflags |= PD_DEREF | PD_DEUREF;
goto done_deref;
}
- }
+ }
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
error = EAGAIN;
@@ -1230,6 +1216,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
+ refcount_init(&pr->pr_ref, 0);
+ refcount_init(&pr->pr_uref, 0);
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@@ -1452,7 +1440,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ !prison_isalive(tpr)) {
descend = 0;
continue;
}
@@ -1520,7 +1508,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ !prison_isalive(tpr)) {
descend = 0;
continue;
}
@@ -1759,8 +1747,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
prison_hold(pr);
refcount_acquire(&pr->pr_uref);
} else {
- refcount_release(&pr->pr_ref);
drflags |= PD_DEUREF;
+ prison_free_not_last(pr);
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
@@ -1824,8 +1812,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#endif
/* Let the modules do their work. */
- sx_downgrade(&allprison_lock);
- drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
if (born) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
if (error) {
@@ -1842,9 +1828,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
- mtx_lock(&pr->pr_mtx);
- error = do_jail_attach(td, pr);
- drflags &= ~PD_LIST_SLOCKED;
+ error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
+ drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
if (error) {
if (created) {
/* do_jail_attach has removed the prison. */
@@ -1857,9 +1842,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef RACCT
if (racct_enable && !created) {
- if (drflags & PD_LIST_SLOCKED) {
- sx_sunlock(&allprison_lock);
- drflags &= ~PD_LIST_SLOCKED;
+ if (drflags & PD_LIST_XLOCKED) {
+ sx_xunlock(&allprison_lock);
+ drflags &= ~PD_LIST_XLOCKED;
}
prison_racct_modify(pr);
}
@@ -1874,8 +1859,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* not be publicly visible).
*/
if (pr_flags & PR_PERSIST) {
- mtx_lock(&pr->pr_mtx);
- drflags |= PD_LOCKED;
+ drflags = prison_lock_xlock(pr, drflags);
refcount_acquire(&pr->pr_ref);
refcount_acquire(&pr->pr_uref);
} else {
@@ -1952,13 +1936,8 @@ get_next_prid(struct prison **insprp)
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id < jid)
continue;
- if (inspr->pr_id > jid ||
- refcount_load(&inspr->pr_ref) == 0) {
- /*
- * Found an opening. This may be a gap
- * in the list, or a dead jail with the
- * same ID.
- */
+ if (inspr->pr_id > jid) {
+ /* Found an opening. */
maxid = 0;
break;
}
@@ -2047,18 +2026,14 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
- if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
+ if (pr->pr_id > jid &&
+ ((flags & JAIL_DYING) || prison_isalive(pr)) &&
+ prison_ischild(mypr, pr)) {
mtx_lock(&pr->pr_mtx);
- if ((flags & JAIL_DYING)
- ? prison_isvalid(pr) : prison_isalive(pr))
- break;
- mtx_unlock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ goto found_prison;
}
}
- if (pr != NULL) {
- drflags |= PD_LOCKED;
- goto found_prison;
- }
error = ENOENT;
vfs_opterror(opts, "no jail after %d", jid);
goto done;
@@ -2314,7 +2289,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
int
sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
{
- struct prison *pr, *cpr, *lpr, *tpr;
+ struct prison *pr, *cpr, *lpr;
int descend, error;
error = priv_check(td, PRIV_JAIL_REMOVE);
@@ -2334,21 +2309,13 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
mtx_unlock(&pr->pr_mtx);
lpr = NULL;
FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
- mtx_lock(&cpr->pr_mtx);
- if (prison_isvalid(cpr)) {
- tpr = cpr;
- prison_hold(cpr);
- } else {
- /* Already removed - do not do it again. */
- tpr = NULL;
- }
- mtx_unlock(&cpr->pr_mtx);
+ prison_hold(cpr);
if (lpr != NULL) {
mtx_lock(&lpr->pr_mtx);
prison_remove_one(lpr);
sx_xlock(&allprison_lock);
}
- lpr = tpr;
+ lpr = cpr;
}
if (lpr != NULL) {
mtx_lock(&lpr->pr_mtx);
@@ -2377,8 +2344,8 @@ prison_remove_one(struct prison *pr)
/* If the prison was persistent, it is not anymore. */
if (pr->pr_flags & PR_PERSIST) {
- refcount_release(&pr->pr_ref);
drflags |= PD_DEUREF;
+ prison_free_not_last(pr);
pr->pr_flags &= ~PR_PERSIST;
}
@@ -2428,14 +2395,7 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
if (error)
return (error);
- /*
- * Start with exclusive hold on allprison_lock to ensure that a possible
- * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
- * But then immediately downgrade it since we don't need to stop
- * readers.
- */
- sx_xlock(&allprison_lock);
- sx_downgrade(&allprison_lock);
+ sx_slock(&allprison_lock);
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
if (pr == NULL) {
sx_sunlock(&allprison_lock);
@@ -2449,16 +2409,18 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (EINVAL);
}
- return (do_jail_attach(td, pr));
+ return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
static int
-do_jail_attach(struct thread *td, struct prison *pr)
+do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
struct proc *p;
struct ucred *newcred, *oldcred;
int error;
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ sx_assert(&allprison_lock, SX_LOCKED);
/*
* XXX: Note that there is a slight race here if two threads
* in the same privileged process attempt to attach to two
@@ -2469,15 +2431,18 @@ do_jail_attach(struct thread *td, struct prison *pr)
*/
refcount_acquire(&pr->pr_ref);
refcount_acquire(&pr->pr_uref);
+ drflags |= PD_DEREF | PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
+ drflags &= ~PD_LOCKED;
/* Let modules do whatever they need to prepare for attaching. */
error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
if (error) {
- prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
+ prison_deref(pr, drflags);
return (error);
}
- sx_sunlock(&allprison_lock);
+ sx_unlock(&allprison_lock);
+ drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
/*
* Reparent the newly attached process to this jail.
@@ -2513,7 +2478,7 @@ do_jail_attach(struct thread *td, struct prison *pr)
rctl_proc_ucred_changed(p, newcred);
crfree(newcred);
#endif
- prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
+ prison_deref(oldcred->cr_prison, drflags);
crfree(oldcred);
/*
@@ -2533,8 +2498,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
e_revert_osd:
/* Tell modules this thread is still in its old jail after all. */
sx_slock(&allprison_lock);
+ drflags |= PD_LIST_SLOCKED;
(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
- prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
+ prison_deref(pr, drflags);
return (error);
}
@@ -2548,19 +2514,13 @@ prison_find(int prid)
sx_assert(&allprison_lock, SX_LOCKED);
TAILQ_FOREACH(pr, &allprison, pr_list) {
- if (pr->pr_id == prid) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
- return (pr);
- /*
- * Any active prison with the same ID would have
- * been inserted before a dead one.
- */
- mtx_unlock(&pr->pr_mtx);
- break;
- }
+ if (pr->pr_id < prid)
+ continue;
if (pr->pr_id > prid)
break;
+ KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
+ mtx_lock(&pr->pr_mtx);
+ return (pr);
}
return (NULL);
}
@@ -2577,10 +2537,10 @@ prison_find_child(struct prison *mypr, int prid)
sx_assert(&allprison_lock, SX_LOCKED);
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (pr->pr_id == prid) {
+ KASSERT(prison_isvalid(pr),
+ ("Found invalid prison %p", pr));
mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
- return (pr);
- mtx_unlock(&pr->pr_mtx);
+ return (pr);
}
}
return (NULL);
@@ -2598,26 +2558,21 @@ prison_find_name(struct prison *mypr, const char *name)
sx_assert(&allprison_lock, SX_LOCKED);
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
- again:
deadpr = NULL;
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (!strcmp(pr->pr_name + mylen, name)) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isalive(pr))
+ KASSERT(prison_isvalid(pr),
+ ("Found invalid prison %p", pr));
+ if (prison_isalive(pr)) {
+ mtx_lock(&pr->pr_mtx);
return (pr);
- if (prison_isvalid(pr))
- deadpr = pr;
- mtx_unlock(&pr->pr_mtx);
+ }
+ deadpr = pr;
}
}
/* There was no valid prison - perhaps there was a dying one. */
- if (deadpr != NULL) {
+ if (deadpr != NULL)
mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto again;
- }
- }
return (deadpr);
}
@@ -2671,45 +2626,53 @@ prison_hold(struct prison *pr)
/*
* Remove a prison reference. If that was the last reference, the
- * prison will be removed (at a later time). Return with the prison
- * unlocked.
+ * prison will be removed (at a later time).
*/
void
prison_free_locked(struct prison *pr)
{
- int lastref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
+ /*
+ * Locking is no longer required, but unlock because the caller
+ * expects it.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ prison_free(pr);
+}
+
+void
+prison_free(struct prison *pr)
+{
+
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- mtx_unlock(&pr->pr_mtx);
- if (lastref) {
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
/*
- * Don't remove the prison itself in this context,
+ * Don't remove the last reference in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
-void
-prison_free(struct prison *pr)
+static void
+prison_free_not_last(struct prison *pr)
{
+#ifdef INVARIANTS
+ int lastref;
- /*
- * Locking is only required when releasing the last reference.
- * This allows assurance that a locked prison will remain valid
- * until it is unlocked.
- */
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- if (refcount_release_if_not_last(&pr->pr_ref))
- return;
- mtx_lock(&pr->pr_mtx);
- prison_free_locked(pr);
+ lastref = refcount_release(&pr->pr_ref);
+ KASSERT(!lastref,
+ ("prison_free_not_last freed last ref on prison %p (jid=%d).",
+ pr, pr->pr_id));
+#else
+ refcount_release(&pr>pr_ref);
+#endif
}
/*
@@ -2718,7 +2681,8 @@ prison_free(struct prison *pr)
* user-visible, except through the the jail system calls. It is also
* an error to hold an invalid prison. A prison record will remain
* alive as long as it has at least one user reference, and will not
- * be set to the dying state was long as the prison mutex is held.
+ * be set to the dying state until the prison mutex and allprison_lock
+ * are both freed.
*/
void
prison_proc_hold(struct prison *pr)
@@ -2756,7 +2720,7 @@ prison_proc_free(struct prison *pr)
* but also half dead. Add a reference so any calls to
* prison_free() won't re-submit the task.
*/
- refcount_acquire(&pr->pr_ref);
+ prison_hold(pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
@@ -2768,18 +2732,18 @@ static void
prison_complete(void *context, int pending)
{
struct prison *pr = context;
+ int drflags;
- sx_xlock(&allprison_lock);
- mtx_lock(&pr->pr_mtx);
/*
- * If this is completing a call to prison_proc_free, there will still
- * be a user reference held; clear that as well as the reference that
- * was added. No references are expected if this is completing a call
- * to prison_free, but prison_deref is still called for the cleanup.
+ * This could be called to release the last reference, or the
+ * last user reference; the existence of a user reference implies
+ * the latter. There will always be a reference to remove, as
+ * prison_proc_free adds one.
*/
- prison_deref(pr, refcount_load(&pr->pr_uref) > 0
- ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
- : PD_LOCKED | PD_LIST_XLOCKED);
+ drflags = prison_lock_xlock(pr, PD_DEREF);
+ if (refcount_load(&pr->pr_uref) > 0)
+ drflags |= PD_DEUREF;
+ prison_deref(pr, drflags);
}
/*
@@ -2794,84 +2758,86 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prisonlist freeprison;
- struct prison *rpr, *tpr;
- int lastref, lasturef;
+ struct prison *rpr, *ppr, *tpr;
TAILQ_INIT(&freeprison);
- if (!(flags & PD_LOCKED))
- mtx_lock(&pr->pr_mtx);
/*
* Release this prison as requested, which may cause its parent
* to be released, and then maybe its grandparent, etc.
*/
for (;;) {
if (flags & PD_DEUREF) {
+ /* Drop a user reference. */
KASSERT(refcount_load(&pr->pr_uref) > 0,
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
pr->pr_id));
- lasturef = refcount_release(&pr->pr_uref);
- if (lasturef)
- refcount_acquire(&pr->pr_ref);
- KASSERT(refcount_load(&prison0.pr_uref) > 0,
- ("prison0 pr_uref=0"));
- } else
- lasturef = 0;
+ if (!refcount_release_if_not_last(&pr->pr_uref)) {
+ if (!(flags & PD_DEREF)) {
+ prison_hold(pr);
+ flags |= PD_DEREF;
+ }
+ flags = prison_lock_xlock(pr, flags);
+ if (refcount_release(&pr->pr_uref)) {
+ /*
+ * When the last user references goes,
+ * this becomes a dying prison.
+ */
+ KASSERT(
+ refcount_load(&prison0.pr_uref) > 0,
+ ("prison0 pr_uref=0"));
+ mtx_unlock(&pr->pr_mtx);
+ flags &= ~PD_LOCKED;
+ (void)osd_jail_call(pr,
+ PR_METHOD_REMOVE, NULL);
+ }
+ }
+ }
if (flags & PD_DEREF) {
+ /* Drop a reference. */
KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- }
- else
- lastref = refcount_load(&pr->pr_ref) == 0;
- mtx_unlock(&pr->pr_mtx);
-
- /*
- * Tell the modules if the last user reference was removed
- * (even it sticks around in dying state).
- */
- if (lasturef) {
- if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
- if (atomic_load_acq_int(&pr->pr_ref) > 1) {
- sx_slock(&allprison_lock);
- flags |= PD_LIST_SLOCKED;
- } else {
- sx_xlock(&allprison_lock);
- flags |= PD_LIST_XLOCKED;
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
+ flags = prison_lock_xlock(pr, flags);
+ if (refcount_release(&pr->pr_ref)) {
+ /*
+ * When the last reference goes,
+ * unlink the prison and set it aside.
+ */
+ KASSERT(
+ refcount_load(&pr->pr_uref) == 0,
+ ("prison_deref: last ref, "
+ "but still has %d urefs (jid=%d)",
+ pr->pr_uref, pr->pr_id));
+ KASSERT(
+ refcount_load(&prison0.pr_ref) != 0,
+ ("prison0 pr_ref=0"));
+ TAILQ_REMOVE(&allprison, pr, pr_list);
+ LIST_REMOVE(pr, pr_sibling);
+ TAILQ_INSERT_TAIL(&freeprison, pr,
+ pr_list);
+ for (ppr = pr->pr_parent;
+ ppr != NULL;
+ ppr = ppr->pr_parent)
+ ppr->pr_childcount--;
+ /*
+ * Removing a prison frees references
+ * from its parent.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ flags &= ~PD_LOCKED;
+ pr = pr->pr_parent;
+ flags |= PD_DEREF | PD_DEUREF;
+ continue;
}
}
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
- mtx_lock(&pr->pr_mtx);
- lastref = refcount_release(&pr->pr_ref);
- mtx_unlock(&pr->pr_mtx);
}
-
- if (!lastref)
- break;
-
- if (flags & PD_LIST_SLOCKED) {
- if (!sx_try_upgrade(&allprison_lock)) {
- sx_sunlock(&allprison_lock);
- sx_xlock(&allprison_lock);
- }
- flags &= ~PD_LIST_SLOCKED;
- } else if (!(flags & PD_LIST_XLOCKED))
- sx_xlock(&allprison_lock);
- flags |= PD_LIST_XLOCKED;
-
- TAILQ_REMOVE(&allprison, pr, pr_list);
- LIST_REMOVE(pr, pr_sibling);
- TAILQ_INSERT_TAIL(&freeprison, pr, pr_list);
- for (tpr = pr->pr_parent; tpr != NULL; tpr = tpr->pr_parent)
- tpr->pr_childcount--;
-
- /* Removing a prison frees a reference on its parent. */
- pr = pr->pr_parent;
- mtx_lock(&pr->pr_mtx);
- flags |= PD_DEREF | PD_DEUREF;
+ break;
}
/* Release all the prison locks. */
+ if (flags & PD_LOCKED)
+ mtx_unlock(&pr->pr_mtx);
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@@ -2902,11 +2868,48 @@ prison_deref(struct prison *pr, int flags)
if (racct_enable)
prison_racct_detach(rpr);
#endif
+ TAILQ_REMOVE(&freeprison, rpr, pr_list);
free(rpr, M_PRISON);
}
}
/*
+ * Given the current locking state in the flags, make sure allprison_lock
+ * is held exclusive, and the prison is locked. Return flags indicating
+ * the new state.
+ */
+static int
+prison_lock_xlock(struct prison *pr, int flags)
+{
+
+ if (!(flags & PD_LIST_XLOCKED)) {
+ /*
+ * Get allprison_lock, which may be an upgrade,
+ * and may require unlocking the prison.
+ */
+ if (flags & PD_LOCKED) {
+ mtx_unlock(&pr->pr_mtx);
+ flags &= ~PD_LOCKED;
+ }
+ if (flags & PD_LIST_SLOCKED) {
+ if (!sx_try_upgrade(&allprison_lock)) {
+ sx_sunlock(&allprison_lock);
+ sx_xlock(&allprison_lock);
+ }
+ flags &= ~PD_LIST_SLOCKED;
+ } else
+ sx_xlock(&allprison_lock);
+ flags |= PD_LIST_XLOCKED;
+ }
+ if (!(flags & PD_LOCKED)) {
+ /* Lock the prison mutex. */
+ mtx_lock(&pr->pr_mtx);
+ flags |= PD_LOCKED;
+ }
+ return flags;
+}
+
+/*
* Set or clear a permission bit in the pr_allow field, passing restrictions
* (cleared permission) down to child jails.
*/
@@ -3068,15 +3071,13 @@ prison_ischild(struct prison *pr1, struct prison *pr2)
}
/*
- * Return true if the prison is currently alive. A prison is alive if it is
- * valid and holds user references, and it isn't being removed.
+ * Return true if the prison is currently alive. A prison is alive if it
+ * holds user references and it isn't being removed.
*/
bool
prison_isalive(struct prison *pr)
{
- if (__predict_false(refcount_load(&pr->pr_ref) == 0))
- return (false);
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
return (false);
if (__predict_false(pr->pr_flags & PR_REMOVE))
@@ -3087,7 +3088,9 @@ prison_isalive(struct prison *pr)
/*
* Return true if the prison is currently valid. A prison is valid if it has
* been fully created, and is not being destroyed. Note that dying prisons
- * are still considered valid.
+ * are still considered valid. Invalid prisons won't be found under normal
+ * circumstances, as they're only put in that state by functions that have
+ * an exclusive hold on allprison_lock.
*/
bool
prison_isvalid(struct prison *pr)
@@ -3754,10 +3757,6 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
cpr->pr_ip6s * sizeof(struct in6_addr));
}
#endif
- if (!prison_isvalid(cpr)) {
- mtx_unlock(&cpr->pr_mtx);
- continue;
- }
bzero(xp, sizeof(*xp));
xp->pr_version = XPRISON_VERSION;
xp->pr_id = cpr->pr_id;
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
index f048234f3a33..435235f0384d 100644
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -290,7 +290,7 @@ msginit()
if (rsv == NULL)
rsv = osd_reserve(msg_prison_slot);
prison_lock(pr);
- if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+ if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
&prison0);
rsv = NULL;
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index deee60d87a5a..dd8925246d1e 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -321,7 +321,7 @@ seminit(void)
if (rsv == NULL)
rsv = osd_reserve(sem_prison_slot);
prison_lock(pr);
- if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+ if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
&prison0);
rsv = NULL;
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index ad5f0030b965..2e7ae927dcc3 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -979,7 +979,7 @@ shminit(void)
if (rsv == NULL)
rsv = osd_reserve(shm_prison_slot);
prison_lock(pr);
- if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+ if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
&prison0);
rsv = NULL;
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index dc94ce213d08..5c1775a261fc 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -1564,29 +1564,26 @@ mqfs_prison_remove(void *obj, void *data __unused)
const struct prison *pr = obj;
struct prison *tpr;
struct mqfs_node *pn, *tpn;
- int found;
+ struct vnode *pr_root;
- found = 0;
+ pr_root = pr->pr_root;
+ if (pr->pr_parent->pr_root == pr_root)
+ return (0);
TAILQ_FOREACH(tpr, &allprison, pr_list) {
- prison_lock(tpr);
- if (tpr != pr && prison_isvalid(tpr) &&
- tpr->pr_root == pr->pr_root)
- found = 1;
- prison_unlock(tpr);
+ if (tpr != pr && tpr->pr_root == pr_root)
+ return (0);
}
- if (!found) {
- /*
- * No jails are rooted in this directory anymore,
- * so no queues should be either.
- */
- sx_xlock(&mqfs_data.mi_lock);
- LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
- mn_sibling, tpn) {
- if (pn->mn_pr_root == pr->pr_root)
- (void)do_unlink(pn, curthread->td_ucred);
- }
- sx_xunlock(&mqfs_data.mi_lock);
+ /*
+ * No jails are rooted in this directory anymore,
+ * so no queues should be either.
+ */
+ sx_xlock(&mqfs_data.mi_lock);
+ LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
+ mn_sibling, tpn) {
+ if (pn->mn_pr_root == pr_root)
+ (void)do_unlink(pn, curthread->td_ucred);
}
+ sx_xunlock(&mqfs_data.mi_lock);
return (0);
}
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 2ac6aabdbd43..a48e189729dc 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -155,7 +155,8 @@ struct prison_racct;
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
- * (r) atomic via refcount(9), pr_mtx required to decrement to zero
+ * (r) atomic via refcount(9), pr_mtx and allprison_lock required to
+ * decrement to zero
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */