aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJamie Gritton <jamie@FreeBSD.org>2021-01-20 23:08:27 +0000
committerJamie Gritton <jamie@FreeBSD.org>2021-01-20 23:08:27 +0000
commit6754ae2572eb20dbc23d2452b5e8fe4e07125f93 (patch)
tree070f256c6ded85f48cb153fbcbf4dcfd1bf10e85
parente3dd8ed77b4e7d8fda12ec80b91d89e8460b64f8 (diff)
downloadsrc-6754ae2572eb20dbc23d2452b5e8fe4e07125f93.tar.gz
src-6754ae2572eb20dbc23d2452b5e8fe4e07125f93.zip
jail: Use refcount(9) for prison references.
Use refcount(9) for both pr_ref and pr_uref in struct prison. This allows prisons to held and freed without requiring the prison mutex. An exception to this is that dropping the last reference will still lock the prison, to keep the guarantee that a locked prison remains valid and alive (provided it was at the time it was locked). Among other things, this honors the promise made in a comment in crcopy(9), that it will not block, which hasn't been true for two decades.
-rw-r--r--sys/kern/kern_jail.c145
-rw-r--r--sys/sys/jail.h9
2 files changed, 91 insertions, 63 deletions
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 757b8bd06b89..e869bafc96b8 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -1116,7 +1116,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
- if (tpr != pr && tpr->pr_ref > 0 &&
+ if (tpr != pr &&
!strcmp(tpr->pr_name + pnamelen, namelc)) {
mtx_lock(&tpr->pr_mtx);
if (prison_isalive(tpr)) {
@@ -1199,8 +1199,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
prison_name(mypr, ppr));
goto done_deref;
}
- ppr->pr_ref++;
- ppr->pr_uref++;
+ prison_hold(ppr);
+ refcount_acquire(&ppr->pr_uref);
mtx_unlock(&ppr->pr_mtx);
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
@@ -1315,7 +1315,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
- pr->pr_ref++;
+ prison_hold(pr);
drflags |= PD_DEREF;
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
@@ -1434,7 +1434,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- tpr->pr_uref == 0) {
+ refcount_load(&tpr->pr_uref) == 0) {
descend = 0;
continue;
}
@@ -1502,7 +1502,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- tpr->pr_uref == 0) {
+ refcount_load(&tpr->pr_uref) == 0) {
descend = 0;
continue;
}
@@ -1738,11 +1738,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
born = !prison_isalive(pr);
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
if (pr_flags & PR_PERSIST) {
- pr->pr_ref++;
- pr->pr_uref++;
+ prison_hold(pr);
+ refcount_acquire(&pr->pr_uref);
} else {
- pr->pr_ref--;
- pr->pr_uref--;
+ refcount_release(&pr->pr_uref);
+ refcount_release(&pr->pr_ref);
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
@@ -1857,8 +1857,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
if (pr_flags & PR_PERSIST) {
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
- pr->pr_ref++;
- pr->pr_uref++;
+ refcount_acquire(&pr->pr_ref);
+ refcount_acquire(&pr->pr_uref);
} else {
/* Non-persistent jails need no further changes. */
pr = NULL;
@@ -1933,7 +1933,8 @@ get_next_prid(struct prison **insprp)
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id < jid)
continue;
- if (inspr->pr_id > jid || inspr->pr_ref == 0) {
+ if (inspr->pr_id > jid ||
+ refcount_load(&inspr->pr_ref) == 0) {
/*
* Found an opening. This may be a gap
* in the list, or a dead jail with the
@@ -2096,7 +2097,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
found_prison:
/* Get the parameters of the prison. */
- pr->pr_ref++;
+ prison_hold(pr);
drflags |= PD_DEREF;
td->td_retval[0] = pr->pr_id;
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
@@ -2309,7 +2310,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
}
/* Remove all descendants of this prison, then remove this prison. */
- pr->pr_ref++;
+ prison_hold(pr);
if (!LIST_EMPTY(&pr->pr_children)) {
mtx_unlock(&pr->pr_mtx);
lpr = NULL;
@@ -2317,7 +2318,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
mtx_lock(&cpr->pr_mtx);
if (prison_isvalid(cpr)) {
tpr = cpr;
- cpr->pr_ref++;
+ prison_hold(cpr);
} else {
/* Already removed - do not do it again. */
tpr = NULL;
@@ -2351,18 +2352,19 @@ prison_remove_one(struct prison *pr)
/* If the prison was persistent, it is not anymore. */
if (pr->pr_flags & PR_PERSIST) {
- pr->pr_ref--;
+ refcount_release(&pr->pr_ref);
drflags |= PD_DEUREF;
pr->pr_flags &= ~PR_PERSIST;
}
/*
* jail_remove added a reference. If that's the only one, remove
- * the prison now.
+ * the prison now. refcount(9) doesn't guarantee the cache coherence
+ * of non-zero counters, so force it here.
*/
- KASSERT(pr->pr_ref > 0,
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
- if (pr->pr_ref == 1) {
+ if (atomic_load_acq_int(&pr->pr_ref) == 1) {
prison_deref(pr, drflags);
return;
}
@@ -2440,8 +2442,8 @@ do_jail_attach(struct thread *td, struct prison *pr)
* a process root from one prison, but attached to the jail
* of another.
*/
- pr->pr_ref++;
- pr->pr_uref++;
+ refcount_acquire(&pr->pr_ref);
+ refcount_acquire(&pr->pr_uref);
mtx_unlock(&pr->pr_mtx);
/* Let modules do whatever they need to prepare for attaching. */
@@ -2614,19 +2616,21 @@ void
prison_hold_locked(struct prison *pr)
{
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- KASSERT(pr->pr_ref > 0,
- ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
- pr->pr_ref++;
+ /* Locking is no longer required. */
+ prison_hold(pr);
}
void
prison_hold(struct prison *pr)
{
+#ifdef INVARIANTS
+ int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
- mtx_lock(&pr->pr_mtx);
- prison_hold_locked(pr);
- mtx_unlock(&pr->pr_mtx);
+ KASSERT(was_valid,
+ ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
+#else
+ refcount_acquire(&pr->pr_ref);
+#endif
}
/*
@@ -2637,14 +2641,17 @@ prison_hold(struct prison *pr)
void
prison_free_locked(struct prison *pr)
{
- int ref;
+ int lastref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
- ref = --pr->pr_ref;
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
+ ("Trying to free dead prison %p (jid=%d).",
+ pr, pr->pr_id));
+ lastref = refcount_release(&pr->pr_ref);
mtx_unlock(&pr->pr_mtx);
- if (ref == 0) {
+ if (lastref) {
/*
- * Don't remove the last reference in this context,
+ * Don't remove the prison itself in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
@@ -2655,6 +2662,16 @@ void
prison_free(struct prison *pr)
{
+ /*
+ * Locking is only required when releasing the last reference.
+ * This allows assurance that a locked prison will remain valid
+ * until it is unlocked.
+ */
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
+ ("Trying to free dead prison %p (jid=%d).",
+ pr, pr->pr_id));
+ if (refcount_release_if_not_last(&pr->pr_ref))
+ return;
mtx_lock(&pr->pr_mtx);
prison_free_locked(pr);
}
@@ -2670,12 +2687,14 @@ prison_free(struct prison *pr)
void
prison_proc_hold(struct prison *pr)
{
+#ifdef INVARIANTS
+ int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
- mtx_lock(&pr->pr_mtx);
- KASSERT(pr->pr_uref > 0,
+ KASSERT(was_alive,
("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
- pr->pr_uref++;
- mtx_unlock(&pr->pr_mtx);
+#else
+ refcount_acquire(&pr->pr_uref);
+#endif
}
/*
@@ -2686,20 +2705,27 @@ prison_proc_hold(struct prison *pr)
void
prison_proc_free(struct prison *pr)
{
+ int lasturef;
- mtx_lock(&pr->pr_mtx);
- KASSERT(pr->pr_uref > 0,
+ /*
+ * Locking is only required when releasing the last reference.
+ * This allows assurance that a locked prison will remain alive
+ * until it is unlocked.
+ */
+ KASSERT(refcount_load(&pr->pr_uref) > 0,
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
- if (pr->pr_uref > 1)
- pr->pr_uref--;
- else {
+ if (refcount_release_if_not_last(&pr->pr_uref))
+ return;
+ mtx_lock(&pr->pr_mtx);
+ lasturef = refcount_release(&pr->pr_uref);
+ if (lasturef) {
/*
* Don't remove the last user reference in this context,
* which is expected to be a process that is not only locked,
* but also half dead. Add a reference so any calls to
* prison_free() won't re-submit the task.
*/
- pr->pr_ref++;
+ refcount_acquire(&pr->pr_ref);
mtx_unlock(&pr->pr_mtx);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
return;
@@ -2723,7 +2749,7 @@ prison_complete(void *context, int pending)
* was added. No references are expected if this is completing a call
* to prison_free, but prison_deref is still called for the cleanup.
*/
- prison_deref(pr, pr->pr_uref > 0
+ prison_deref(pr, refcount_load(&pr->pr_uref) > 0
? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
: PD_LOCKED | PD_LIST_XLOCKED);
}
@@ -2740,29 +2766,30 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prison *ppr, *tpr;
- int ref, lasturef;
+ int lastref, lasturef;
if (!(flags & PD_LOCKED))
mtx_lock(&pr->pr_mtx);
for (;;) {
if (flags & PD_DEUREF) {
- KASSERT(pr->pr_uref > 0,
+ KASSERT(refcount_load(&pr->pr_uref) > 0,
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
pr->pr_id));
- pr->pr_uref--;
- lasturef = pr->pr_uref == 0;
+ lasturef = refcount_release(&pr->pr_uref);
if (lasturef)
- pr->pr_ref++;
- KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
+ refcount_acquire(&pr->pr_ref);
+ KASSERT(refcount_load(&prison0.pr_uref) > 0,
+ ("prison0 pr_uref=0"));
} else
lasturef = 0;
if (flags & PD_DEREF) {
- KASSERT(pr->pr_ref > 0,
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
- pr->pr_ref--;
+ lastref = refcount_release(&pr->pr_ref);
}
- ref = pr->pr_ref;
+ else
+ lastref = refcount_load(&pr->pr_ref) == 0;
mtx_unlock(&pr->pr_mtx);
/*
@@ -2771,7 +2798,7 @@ prison_deref(struct prison *pr, int flags)
*/
if (lasturef) {
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
- if (ref > 1) {
+ if (atomic_load_acq_int(&pr->pr_ref) > 1) {
sx_slock(&allprison_lock);
flags |= PD_LIST_SLOCKED;
} else {
@@ -2781,12 +2808,12 @@ prison_deref(struct prison *pr, int flags)
}
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
mtx_lock(&pr->pr_mtx);
- ref = --pr->pr_ref;
+ lastref = refcount_release(&pr->pr_ref);
mtx_unlock(&pr->pr_mtx);
}
/* If the prison still has references, nothing else to do. */
- if (ref > 0) {
+ if (!lastref) {
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@@ -3008,9 +3035,9 @@ prison_isalive(struct prison *pr)
{
mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(pr->pr_ref == 0))
+ if (__predict_false(refcount_load(&pr->pr_ref) == 0))
return (false);
- if (__predict_false(pr->pr_uref == 0))
+ if (__predict_false(refcount_load(&pr->pr_uref) == 0))
return (false);
return (true);
}
@@ -3025,7 +3052,7 @@ prison_isvalid(struct prison *pr)
{
mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(pr->pr_ref == 0))
+ if (__predict_false(refcount_load(&pr->pr_ref) == 0))
return (false);
return (true);
}
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 67ef9347d093..2d1a26787b99 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -150,17 +150,18 @@ struct prison_racct;
*
* Lock key:
* (a) allprison_lock
+ * (c) set only during creation before the structure is shared, no mutex
+ * required to read
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
- * (c) set only during creation before the structure is shared, no mutex
- * required to read
+ * (r) atomic via refcount(9), pr_mtx required to decrement to zero
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
int pr_id; /* (c) prison id */
- int pr_ref; /* (m) refcount */
- int pr_uref; /* (m) user (alive) refcount */
+ volatile u_int pr_ref; /* (r) refcount */
+ volatile u_int pr_uref; /* (r) user (alive) refcount */
unsigned pr_flags; /* (p) PR_* flags */
LIST_HEAD(, prison) pr_children; /* (a) list of child jails */
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */