aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux/spl/spl-zone.c')
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-zone.c413
1 files changed, 385 insertions, 28 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
index b2eae5d00b10..5992957280e4 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -59,6 +59,18 @@ typedef struct zone_dataset {
char zd_dsname[]; /* name of the member dataset */
} zone_dataset_t;
+/*
+ * UID-based dataset zoning: allows delegating datasets to all user
+ * namespaces owned by a specific UID, enabling rootless container support.
+ */
+typedef struct zone_uid_datasets {
+ struct list_head zuds_list; /* zone_uid_datasets linkage */
+ kuid_t zuds_owner; /* owner UID */
+ struct list_head zuds_datasets; /* datasets for this UID */
+} zone_uid_datasets_t;
+
+static struct list_head zone_uid_datasets;
+
#ifdef CONFIG_USER_NS
/*
@@ -138,6 +150,18 @@ zone_datasets_lookup(unsigned int nsinum)
}
#ifdef CONFIG_USER_NS
+static zone_uid_datasets_t *
+zone_uid_datasets_lookup(kuid_t owner)
+{
+ zone_uid_datasets_t *zuds;
+
+ list_for_each_entry(zuds, &zone_uid_datasets, zuds_list) {
+ if (uid_eq(zuds->zuds_owner, owner))
+ return (zuds);
+ }
+ return (NULL);
+}
+
static struct zone_dataset *
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
{
@@ -232,6 +256,62 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
EXPORT_SYMBOL(zone_dataset_attach);
int
+zone_dataset_attach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+ zone_uid_datasets_t *zuds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+ kuid_t kowner;
+
+ /* Only root can attach datasets to UIDs */
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+
+ kowner = make_kuid(current_user_ns(), owner_uid);
+ if (!uid_valid(kowner))
+ return (EINVAL);
+
+ mutex_enter(&zone_datasets_lock);
+
+ /* Find or create UID entry */
+ zuds = zone_uid_datasets_lookup(kowner);
+ if (zuds == NULL) {
+ zuds = kmem_alloc(sizeof (zone_uid_datasets_t), KM_SLEEP);
+ INIT_LIST_HEAD(&zuds->zuds_list);
+ INIT_LIST_HEAD(&zuds->zuds_datasets);
+ zuds->zuds_owner = kowner;
+ list_add_tail(&zuds->zuds_list, &zone_uid_datasets);
+ } else {
+ /* Check if dataset already attached */
+ list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+ if (zd->zd_dsnamelen == dsnamelen &&
+ strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+ mutex_exit(&zone_datasets_lock);
+ return (EEXIST);
+ }
+ }
+ }
+
+ /* Add dataset to UID's list */
+ zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+ zd->zd_dsnamelen = dsnamelen;
+ strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
+ INIT_LIST_HEAD(&zd->zd_list);
+ list_add_tail(&zd->zd_list, &zuds->zuds_datasets);
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_attach_uid);
+
+int
zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
{
#ifdef CONFIG_USER_NS
@@ -280,6 +360,217 @@ zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
}
EXPORT_SYMBOL(zone_dataset_detach);
+int
+zone_dataset_detach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+ zone_uid_datasets_t *zuds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+ kuid_t kowner;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+
+ kowner = make_kuid(current_user_ns(), owner_uid);
+ if (!uid_valid(kowner))
+ return (EINVAL);
+
+ mutex_enter(&zone_datasets_lock);
+
+ zuds = zone_uid_datasets_lookup(kowner);
+ if (zuds == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+ }
+
+ /* Find and remove dataset */
+ list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+ if (zd->zd_dsnamelen == dsnamelen &&
+ strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+ /* Remove UID entry if no more datasets */
+ if (list_empty(&zuds->zuds_datasets)) {
+ list_del(&zuds->zuds_list);
+ kmem_free(zuds, sizeof (*zuds));
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+ }
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+#else
+ return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_detach_uid);
+
+/*
+ * Callback for looking up zoned_uid property (registered by ZFS module).
+ */
+static zone_get_zoned_uid_fn_t zone_get_zoned_uid_fn = NULL;
+
+void
+zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
+{
+ zone_get_zoned_uid_fn = fn;
+}
+EXPORT_SYMBOL(zone_register_zoned_uid_callback);
+
+void
+zone_unregister_zoned_uid_callback(void)
+{
+ zone_get_zoned_uid_fn = NULL;
+}
+EXPORT_SYMBOL(zone_unregister_zoned_uid_callback);
+
+#ifdef CONFIG_USER_NS
+/*
+ * Check if a dataset is the delegation root (has zoned_uid set locally).
+ */
+static boolean_t
+zone_dataset_is_zoned_uid_root(const char *dataset, uid_t zoned_uid)
+{
+ char *root;
+ uid_t found_uid;
+ boolean_t is_root;
+
+ if (zone_get_zoned_uid_fn == NULL)
+ return (B_FALSE);
+
+ root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ found_uid = zone_get_zoned_uid_fn(dataset, root, MAXPATHLEN);
+ is_root = (found_uid == zoned_uid && strcmp(root, dataset) == 0);
+ kmem_free(root, MAXPATHLEN);
+ return (is_root);
+}
+#endif /* CONFIG_USER_NS */
+
+/*
+ * Core authorization check for zoned_uid write delegation.
+ */
+zone_admin_result_t
+zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
+ const char *aux_dataset)
+{
+#ifdef CONFIG_USER_NS
+ struct user_namespace *user_ns;
+ char *delegation_root;
+ uid_t zoned_uid, ns_owner_uid;
+ int write_unused;
+ zone_admin_result_t result = ZONE_ADMIN_NOT_APPLICABLE;
+
+ /* Step 1: If in global zone, not applicable */
+ if (INGLOBALZONE(curproc))
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+
+ /* Step 2: Need callback to be registered */
+ if (zone_get_zoned_uid_fn == NULL)
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+
+ delegation_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /* Step 3: Find delegation root */
+ zoned_uid = zone_get_zoned_uid_fn(dataset, delegation_root,
+ MAXPATHLEN);
+ if (zoned_uid == 0)
+ goto out;
+
+ /* Step 4: Verify namespace owner matches */
+ user_ns = current_user_ns();
+ ns_owner_uid = from_kuid(&init_user_ns, user_ns->owner);
+ if (ns_owner_uid != zoned_uid)
+ goto out;
+
+ /* Step 5: Tiered capability check based on operation class */
+ {
+ int required_cap;
+ switch (op) {
+ case ZONE_OP_DESTROY:
+ case ZONE_OP_RENAME:
+ case ZONE_OP_CLONE:
+ required_cap = CAP_SYS_ADMIN;
+ break;
+ case ZONE_OP_CREATE:
+ case ZONE_OP_SNAPSHOT:
+ case ZONE_OP_SETPROP:
+ required_cap = CAP_FOWNER;
+ break;
+ default:
+ required_cap = CAP_SYS_ADMIN;
+ break;
+ }
+ if (!ns_capable(user_ns, required_cap)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ }
+
+ /* Step 6: Operation-specific constraints */
+ switch (op) {
+ case ZONE_OP_DESTROY:
+ /* Cannot destroy the delegation root itself */
+ if (zone_dataset_is_zoned_uid_root(dataset, zoned_uid)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ break;
+
+ case ZONE_OP_RENAME:
+ /* Cannot rename outside delegation subtree */
+ if (aux_dataset != NULL) {
+ char *dst_root;
+ uid_t dst_uid;
+
+ dst_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ dst_uid = zone_get_zoned_uid_fn(aux_dataset,
+ dst_root, MAXPATHLEN);
+ if (dst_uid != zoned_uid ||
+ strcmp(dst_root, delegation_root) != 0) {
+ kmem_free(dst_root, MAXPATHLEN);
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ kmem_free(dst_root, MAXPATHLEN);
+ }
+ break;
+
+ case ZONE_OP_CLONE:
+ /* Clone source must be visible */
+ if (aux_dataset != NULL) {
+ if (!zone_dataset_visible(aux_dataset, &write_unused)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ }
+ break;
+
+ case ZONE_OP_CREATE:
+ case ZONE_OP_SNAPSHOT:
+ case ZONE_OP_SETPROP:
+ /* No additional constraints */
+ break;
+ }
+
+ result = ZONE_ADMIN_ALLOWED;
+out:
+ kmem_free(delegation_root, MAXPATHLEN);
+ return (result);
+#else
+ (void) dataset, (void) op, (void) aux_dataset;
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+#endif
+}
+EXPORT_SYMBOL(zone_dataset_admin_check);
+
/*
* A dataset is visible if:
* - It is a parent of a namespace entry.
@@ -293,34 +584,19 @@ EXPORT_SYMBOL(zone_dataset_detach);
* The parent datasets of namespace entries are visible and
* read-only to provide a path back to the root of the pool.
*/
-int
-zone_dataset_visible(const char *dataset, int *write)
+/*
+ * Helper function to check if a dataset matches against a list of
+ * delegated datasets. Returns visibility and sets write permission.
+ */
+static int
+zone_dataset_check_list(struct list_head *datasets, const char *dataset,
+ size_t dsnamelen, int *write)
{
- zone_datasets_t *zds;
zone_dataset_t *zd;
- size_t dsnamelen, zd_len;
- int visible;
-
- /* Default to read-only, in case visible is returned. */
- if (write != NULL)
- *write = 0;
- if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
- return (0);
- if (INGLOBALZONE(curproc)) {
- if (write != NULL)
- *write = 1;
- return (1);
- }
+ size_t zd_len;
+ int visible = 0;
- mutex_enter(&zone_datasets_lock);
- zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
- if (zds == NULL) {
- mutex_exit(&zone_datasets_lock);
- return (0);
- }
-
- visible = 0;
- list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ list_for_each_entry(zd, datasets, zd_list) {
zd_len = strlen(zd->zd_dsname);
if (zd_len > dsnamelen) {
/*
@@ -352,7 +628,8 @@ zone_dataset_visible(const char *dataset, int *write)
* the namespace entry.
*/
visible = memcmp(zd->zd_dsname, dataset,
- zd_len) == 0 && dataset[zd_len] == '/';
+ zd_len) == 0 && (dataset[zd_len] == '/' ||
+ dataset[zd_len] == '@' || dataset[zd_len] == '#');
if (visible) {
if (write != NULL)
*write = 1;
@@ -361,9 +638,70 @@ zone_dataset_visible(const char *dataset, int *write)
}
}
- mutex_exit(&zone_datasets_lock);
return (visible);
}
+
+#if defined(CONFIG_USER_NS)
+/*
+ * Check UID-based zoning visibility for the current process.
+ * Must be called with zone_datasets_lock held.
+ */
+static int
+zone_dataset_visible_uid(const char *dataset, size_t dsnamelen, int *write)
+{
+ zone_uid_datasets_t *zuds;
+
+ zuds = zone_uid_datasets_lookup(curproc->cred->user_ns->owner);
+ if (zuds != NULL)
+ return (zone_dataset_check_list(&zuds->zuds_datasets, dataset,
+ dsnamelen, write));
+ return (0);
+}
+#endif
+
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_datasets_t *zds;
+ size_t dsnamelen;
+ int visible;
+
+ /* Default to read-only, in case visible is returned. */
+ if (write != NULL)
+ *write = 0;
+ if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+ return (0);
+ if (INGLOBALZONE(curproc)) {
+ if (write != NULL)
+ *write = 1;
+ return (1);
+ }
+
+ mutex_enter(&zone_datasets_lock);
+
+ /* First, check namespace-specific zoning (existing behavior) */
+ zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+ if (zds != NULL) {
+ visible = zone_dataset_check_list(&zds->zds_datasets, dataset,
+ dsnamelen, write);
+ if (visible) {
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+ }
+ }
+
+ /* Second, check UID-based zoning */
+#if defined(CONFIG_USER_NS)
+ visible = zone_dataset_visible_uid(dataset, dsnamelen, write);
+ if (visible) {
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+ }
+#endif
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+}
EXPORT_SYMBOL(zone_dataset_visible);
unsigned int
@@ -395,8 +733,9 @@ EXPORT_SYMBOL(crgetzoneid);
boolean_t
inglobalzone(proc_t *proc)
{
+ (void) proc;
#if defined(CONFIG_USER_NS)
- return (proc->cred->user_ns == &init_user_ns);
+ return (current_user_ns() == &init_user_ns);
#else
return (B_TRUE);
#endif
@@ -408,6 +747,7 @@ spl_zone_init(void)
{
mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&zone_datasets);
+ INIT_LIST_HEAD(&zone_uid_datasets);
return (0);
}
@@ -415,6 +755,7 @@ void
spl_zone_fini(void)
{
zone_datasets_t *zds;
+ zone_uid_datasets_t *zuds;
zone_dataset_t *zd;
/*
@@ -423,6 +764,22 @@ spl_zone_fini(void)
* namespace is destroyed, just do it here, since spl is about to go
* out of context.
*/
+
+ /* Clean up UID-based delegations */
+ while (!list_empty(&zone_uid_datasets)) {
+ zuds = list_entry(zone_uid_datasets.next,
+ zone_uid_datasets_t, zuds_list);
+ while (!list_empty(&zuds->zuds_datasets)) {
+ zd = list_entry(zuds->zuds_datasets.next,
+ zone_dataset_t, zd_list);
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+ }
+ list_del(&zuds->zuds_list);
+ kmem_free(zuds, sizeof (*zuds));
+ }
+
+ /* Clean up namespace-based delegations */
while (!list_empty(&zone_datasets)) {
zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
while (!list_empty(&zds->zds_datasets)) {