aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/kern_jaildesc.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/kern_jaildesc.c')
-rw-r--r--sys/kern/kern_jaildesc.c412
1 files changed, 412 insertions, 0 deletions
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
new file mode 100644
index 000000000000..3f322b271400
--- /dev/null
+++ b/sys/kern/kern_jaildesc.c
@@ -0,0 +1,412 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+
+MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
+
+static fo_poll_t jaildesc_poll;
+static fo_kqfilter_t jaildesc_kqfilter;
+static fo_stat_t jaildesc_stat;
+static fo_close_t jaildesc_close;
+static fo_fill_kinfo_t jaildesc_fill_kinfo;
+static fo_cmp_t jaildesc_cmp;
+
+static struct fileops jaildesc_ops = {
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = invfo_ioctl,
+ .fo_poll = jaildesc_poll,
+ .fo_kqfilter = jaildesc_kqfilter,
+ .fo_stat = jaildesc_stat,
+ .fo_close = jaildesc_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = jaildesc_fill_kinfo,
+ .fo_cmp = jaildesc_cmp,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+/*
+ * Given a jail descriptor number, return its prison and/or its
+ * credential. They are returned held, and will need to be released
+ * by the caller.
+ */
+int
+jaildesc_find(struct thread *td, int fd, struct prison **prp,
+ struct ucred **ucredp)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ struct prison *pr;
+ int error;
+
+ error = fget(td, fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_JAILDESC) {
+ error = EINVAL;
+ goto out;
+ }
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr == NULL || !prison_isvalid(pr)) {
+ error = ENOENT;
+ JAILDESC_UNLOCK(jd);
+ goto out;
+ }
+ if (prp != NULL) {
+ prison_hold(pr);
+ *prp = pr;
+ }
+ JAILDESC_UNLOCK(jd);
+ if (ucredp != NULL)
+ *ucredp = crhold(fp->f_cred);
+ out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Allocate a new jail decriptor, not yet associated with a prison.
+ * Return the file pointer (with a reference held) and the descriptor
+ * number.
+ */
+int
+jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ int error;
+
+ if (owning) {
+ error = priv_check(td, PRIV_JAIL_REMOVE);
+ if (error != 0)
+ return (error);
+ }
+ jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
+ error = falloc_caps(td, &fp, fdp, 0, NULL);
+ if (error != 0) {
+ free(jd, M_JAILDESC);
+ return (error);
+ }
+ finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
+ FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
+ JAILDESC_LOCK_INIT(jd);
+ knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
+ if (owning)
+ jd->jd_flags |= JDF_OWNING;
+ *fpp = fp;
+ return (0);
+}
+
+/*
+ * Assocate a jail descriptor with its prison.
+ */
+void
+jaildesc_set_prison(struct file *fp, struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ jd->jd_prison = pr;
+ LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+}
+
+/*
+ * Detach all the jail descriptors from a prison.
+ */
+void
+jaildesc_prison_cleanup(struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ while ((jd = LIST_FIRST(&pr->pr_descs))) {
+ JAILDESC_LOCK(jd);
+ LIST_REMOVE(jd, jd_list);
+ jd->jd_prison = NULL;
+ JAILDESC_UNLOCK(jd);
+ prison_free(pr);
+ }
+}
+
+/*
+ * Pass a note to all listening kqueues.
+ */
+void
+jaildesc_knote(struct prison *pr, long hint)
+{
+ struct jaildesc *jd;
+ int prison_locked;
+
+ if (!LIST_EMPTY(&pr->pr_descs)) {
+ prison_locked = mtx_owned(&pr->pr_mtx);
+ if (!prison_locked)
+ prison_lock(pr);
+ LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
+ JAILDESC_LOCK(jd);
+ if (hint == NOTE_JAIL_REMOVE) {
+ jd->jd_flags |= JDF_REMOVED;
+ if (jd->jd_flags & JDF_SELECTED) {
+ jd->jd_flags &= ~JDF_SELECTED;
+ selwakeup(&jd->jd_selinfo);
+ }
+ }
+ KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
+ JAILDESC_UNLOCK(jd);
+ }
+ if (!prison_locked)
+ prison_unlock(pr);
+ }
+}
+
+static int
+jaildesc_close(struct file *fp, struct thread *td)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+
+ jd = fp->f_data;
+ fp->f_data = NULL;
+ if (jd != NULL) {
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr != NULL) {
+ /*
+ * Free or remove the associated prison.
+ * This requires a second check after re-
+ * ordering locks. This jaildesc can remain
+ * unlocked once we have a prison reference,
+ * because that prison is the only place that
+ * still points back to it.
+ */
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+ if (jd->jd_flags & JDF_OWNING) {
+ sx_xlock(&allprison_lock);
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ /*
+ * Unlink the prison, but don't free
+ * it; that will be done as part of
+ * of prison_remove.
+ */
+ LIST_REMOVE(jd, jd_list);
+ prison_remove(pr);
+ } else {
+ prison_unlock(pr);
+ sx_xunlock(&allprison_lock);
+ }
+ } else {
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ LIST_REMOVE(jd, jd_list);
+ prison_free(pr);
+ }
+ prison_unlock(pr);
+ }
+ prison_free(pr);
+ }
+ knlist_destroy(&jd->jd_selinfo.si_note);
+ JAILDESC_LOCK_DESTROY(jd);
+ free(jd, M_JAILDESC);
+ }
+ return (0);
+}
+
+static int
+jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int revents;
+
+ revents = 0;
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_flags & JDF_REMOVED)
+ revents |= POLLHUP;
+ if (revents == 0) {
+ selrecord(td, &jd->jd_selinfo);
+ jd->jd_flags |= JDF_SELECTED;
+ }
+ JAILDESC_UNLOCK(jd);
+ return (revents);
+}
+
+static void
+jaildesc_kqops_detach(struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = kn->kn_fp->f_data;
+ knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
+}
+
+static int
+jaildesc_kqops_event(struct knote *kn, long hint)
+{
+ struct jaildesc *jd;
+ u_int event;
+
+ jd = kn->kn_fp->f_data;
+ if (hint == 0) {
+ /*
+ * Initial test after registration. Generate a
+ * NOTE_JAIL_REMOVE in case the prison already died
+ * before registration.
+ */
+ event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
+ } else {
+ /*
+ * Mask off extra data. In the NOTE_JAIL_CHILD case,
+ * that's everything except the NOTE_JAIL_CHILD bit
+ * itself, since a JID is any positive integer.
+ */
+ event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
+ (u_int)hint & NOTE_JAIL_CTRLMASK;
+ }
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event) {
+ kn->kn_fflags |= event;
+ /* Report the created jail id or attached process id. */
+ if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
+ if (kn->kn_data != 0)
+ kn->kn_fflags |= NOTE_JAIL_MULTI;
+ kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
+ (u_int)hint & ~event;
+ }
+ }
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
+static const struct filterops jaildesc_kqops = {
+ .f_isfd = 1,
+ .f_detach = jaildesc_kqops_detach,
+ .f_event = jaildesc_kqops_event,
+};
+
+static int
+jaildesc_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ switch (kn->kn_filter) {
+ case EVFILT_JAILDESC:
+ kn->kn_fop = &jaildesc_kqops;
+ kn->kn_flags |= EV_CLEAR;
+ knlist_add(&jd->jd_selinfo.si_note, kn, 0);
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+static int
+jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
+{
+ struct jaildesc *jd;
+
+ bzero(sb, sizeof(struct stat));
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_prison != NULL) {
+ sb->st_ino = jd->jd_prison->pr_id;
+ sb->st_mode = S_IFREG | S_IRWXU;
+ } else
+ sb->st_mode = S_IFREG;
+ JAILDESC_UNLOCK(jd);
+ return (0);
+}
+
+static int
+jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ kif->kf_type = KF_TYPE_JAILDESC;
+ kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
+ return (0);
+}
+
+static int
+jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
+{
+ struct jaildesc *jd1, *jd2;
+ int jid1, jid2;
+
+ if (fp2->f_type != DTYPE_JAILDESC)
+ return (3);
+ jd1 = fp1->f_data;
+ JAILDESC_LOCK(jd1);
+ jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd1);
+ jd2 = fp2->f_data;
+ JAILDESC_LOCK(jd2);
+ jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd2);
+ return (kcmp_cmp(jid1, jid2));
+}