diff options
Diffstat (limited to 'sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c')
-rw-r--r-- | sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c | 551 |
1 files changed, 385 insertions, 166 deletions
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 42e11eeb183d..a972c720dfdb 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -62,8 +62,8 @@ #include <sys/sunddi.h> #include <sys/dmu_objset.h> #include <sys/dsl_dir.h> -#include <sys/spa_boot.h> #include <sys/jail.h> +#include <sys/osd.h> #include <ufs/ufs/quota.h> #include <sys/zfs_quota.h> @@ -76,7 +76,6 @@ #define MNTK_NOMSYNC 8 #endif -/* BEGIN CSTYLED */ struct mtx zfs_debug_mtx; MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); @@ -84,23 +83,36 @@ SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); int zfs_super_owner; SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, - "File system owner can perform privileged operation on his file systems"); + "File system owners can perform privileged operation on file systems"); int zfs_debug_level; SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, "Debug level"); +struct zfs_jailparam { + int mount_snapshot; +}; + +static struct zfs_jailparam zfs_jailparam0 = { + .mount_snapshot = 0, +}; + +static int zfs_jailparam_slot; + +SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); +SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", + "Allow mounting snapshots in the .zfs directory for unjailed datasets"); + SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); static int zfs_version_acl = ZFS_ACL_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, - "ZFS_ACL_VERSION"); + "ZFS_ACL_VERSION"); static int zfs_version_spa = SPA_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, - "SPA_VERSION"); + "SPA_VERSION"); static int zfs_version_zpl = ZPL_VERSION; SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, - "ZPL_VERSION"); -/* END CSTYLED */ + "ZPL_VERSION"); #if __FreeBSD_version >= 1400018 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, @@ -141,7 +153,12 @@ struct vfsops zfs_vfsops = { .vfs_quotactl = zfs_quotactl, }; -VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); +#ifdef VFCF_CROSS_COPY_FILE_RANGE +VFS_SET(zfs_vfsops, zfs, + VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE); +#else +VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL); +#endif /* * We need to keep a count of active fs's. @@ -218,7 +235,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, vfs_unbusy(vfsp); if (tmp != *val) { - (void) strcpy(setpoint, "temporary"); + if (setpoint) + (void) strcpy(setpoint, "temporary"); *val = tmp; } return (0); @@ -289,7 +307,8 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) cmd = cmds >> SUBCMDSHIFT; type = cmds & SUBCMDMASK; - ZFS_ENTER(zfsvfs); + if ((error = zfs_enter(zfsvfs, FTAG)) != 0) + return (error); if (id == -1) { switch (type) { case USRQUOTA: @@ -388,7 +407,7 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) break; } done: - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); return (error); } @@ -399,7 +418,6 @@ zfs_is_readonly(zfsvfs_t *zfsvfs) return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); } -/*ARGSUSED*/ static int zfs_sync(vfs_t *vfsp, int waitfor) { @@ -426,11 +444,8 @@ zfs_sync(vfs_t *vfsp, int waitfor) dsl_pool_t *dp; int error; - error = vfs_stdsync(vfsp, waitfor); - if (error != 0) + if ((error = zfs_enter(zfsvfs, FTAG)) != 0) return (error); - - ZFS_ENTER(zfsvfs); dp = dmu_objset_pool(zfsvfs->z_os); /* @@ -438,14 +453,14 @@ zfs_sync(vfs_t *vfsp, int waitfor) * filesystems which may exist on a suspended pool. */ if (rebooting && spa_suspended(dp->dp_spa)) { - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); return (0); } if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, 0); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); } else { /* * Sync all ZFS filesystems. This is what happens when you @@ -712,7 +727,7 @@ zfs_register_callbacks(vfs_t *vfsp) nbmand = B_FALSE; } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { nbmand = B_TRUE; - } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) { + } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { dsl_pool_config_exit(dmu_objset_pool(os), FTAG); return (error); } @@ -1030,8 +1045,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) if (error) return (error); - zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); - /* * If we are not mounting (ie: online recv), then we don't * have to worry about replaying the log as we blocked all @@ -1041,7 +1054,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) boolean_t readonly; ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); - dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); + error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); + if (error) + return (error); + zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, + &zfsvfs->z_kstat.dk_zil_sums); /* * During replay we remove the read only flag to @@ -1112,6 +1129,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) /* restore readonly bit */ if (readonly != 0) zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; + } else { + ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); + zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, + &zfsvfs->z_kstat.dk_zil_sums); } /* @@ -1133,7 +1154,6 @@ zfsvfs_free(zfsvfs_t *zfsvfs) mutex_destroy(&zfsvfs->z_znodes_lock); mutex_destroy(&zfsvfs->z_lock); - ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); list_destroy(&zfsvfs->z_all_znodes); ZFS_TEARDOWN_DESTROY(zfsvfs); ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); @@ -1148,23 +1168,6 @@ static void zfs_set_fuid_feature(zfsvfs_t *zfsvfs) { zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); - if (zfsvfs->z_vfs) { - if (zfsvfs->z_use_fuids) { - vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); - } else { - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); - vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); - } - } zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); } @@ -1223,15 +1226,6 @@ zfs_domount(vfs_t *vfsp, char *osname) * Set features for file system. */ zfs_set_fuid_feature(zfsvfs); - if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { - vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); - vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); - vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); - } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { - vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); - vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); - } - vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; @@ -1294,8 +1288,7 @@ getpoolname(const char *osname, char *poolname) } else { if (p - osname >= MAXNAMELEN) return (ENAMETOOLONG); - (void) strncpy(poolname, osname, p - osname); - poolname[p - osname] = '\0'; + (void) strlcpy(poolname, osname, p - osname + 1); } return (0); } @@ -1312,7 +1305,6 @@ fetch_osname_options(char *name, bool *checkpointrewind) } } -/*ARGSUSED*/ static int zfs_mount(vfs_t *vfsp) { @@ -1322,7 +1314,7 @@ zfs_mount(vfs_t *vfsp) char *osname; int error = 0; int canwrite; - bool checkpointrewind; + bool checkpointrewind, isctlsnap = false; if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) return (SET_ERROR(EINVAL)); @@ -1337,6 +1329,8 @@ zfs_mount(vfs_t *vfsp) } fetch_osname_options(osname, &checkpointrewind); + isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && + strchr(osname, '@') != NULL); /* * Check for mount privilege? @@ -1345,7 +1339,9 @@ zfs_mount(vfs_t *vfsp) * we have local permission to allow it */ error = secpolicy_fs_mount(cr, mvp, vfsp); - if (error) { + if (error && isctlsnap) { + secpolicy_fs_mount_clearopts(cr, vfsp); + } else if (error) { if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) goto out; @@ -1382,8 +1378,27 @@ zfs_mount(vfs_t *vfsp) */ if (!INGLOBALZONE(curproc) && (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { - error = SET_ERROR(EPERM); - goto out; + boolean_t mount_snapshot = B_FALSE; + + /* + * Snapshots may be mounted in .zfs for unjailed datasets + * if allowed by the jail param zfs.mount_snapshot. + */ + if (isctlsnap) { + struct prison *pr; + struct zfs_jailparam *zjp; + + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + zjp = osd_jail_get(pr, zfs_jailparam_slot); + mtx_unlock(&pr->pr_mtx); + if (zjp && zjp->mount_snapshot) + mount_snapshot = B_TRUE; + } + if (!mount_snapshot) { + error = SET_ERROR(EPERM); + goto out; + } } vfsp->vfs_flag |= MNT_NFS4ACLS; @@ -1433,10 +1448,12 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; uint64_t refdbytes, availbytes, usedobjs, availobjs; + int error; statp->f_version = STATFS_VERSION; - ZFS_ENTER(zfsvfs); + if ((error = zfs_enter(zfsvfs, FTAG)) != 0) + return (error); dmu_objset_space(zfsvfs->z_os, &refdbytes, &availbytes, &usedobjs, &availobjs); @@ -1483,7 +1500,7 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp) statp->f_namemax = MAXNAMELEN - 1; - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); return (0); } @@ -1494,13 +1511,14 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) znode_t *rootzp; int error; - ZFS_ENTER(zfsvfs); + if ((error = zfs_enter(zfsvfs, FTAG)) != 0) + return (error); error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); if (error == 0) *vpp = ZTOV(rootzp); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); if (error == 0) { error = vn_lock(*vpp, flags); @@ -1539,12 +1557,11 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * may add the parents of dir-based xattrs to the taskq * so we want to wait for these. * - * We can safely read z_nr_znodes without locking because the - * VFS has already blocked operations which add to the - * z_all_znodes list and thus increment z_nr_znodes. + * We can safely check z_all_znodes for being empty because the + * VFS has already blocked operations which add to it. */ int round = 0; - while (zfsvfs->z_nr_znodes > 0) { + while (!list_is_empty(&zfsvfs->z_all_znodes)) { taskq_wait_outstanding(dsl_pool_zrele_taskq( dmu_objset_pool(zfsvfs->z_os)), 0); if (++round > 1 && !unmounting) @@ -1643,7 +1660,6 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) return (0); } -/*ARGSUSED*/ static int zfs_umount(vfs_t *vfsp, int fflag) { @@ -1738,7 +1754,8 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) return (EOPNOTSUPP); - ZFS_ENTER(zfsvfs); + if ((err = zfs_enter(zfsvfs, FTAG)) != 0) + return (err); err = zfs_zget(zfsvfs, ino, &zp); if (err == 0 && zp->z_unlinked) { vrele(ZTOV(zp)); @@ -1746,7 +1763,7 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) } if (err == 0) *vpp = ZTOV(zp); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); if (err == 0) { err = vn_lock(*vpp, flags); if (err != 0) @@ -1779,8 +1796,10 @@ zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, credanonp, numsecflavors, secflavors)); } -CTASSERT(SHORT_FID_LEN <= sizeof (struct fid)); -CTASSERT(LONG_FID_LEN <= sizeof (struct fid)); +_Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, + "struct fid bigger than SHORT_FID_LEN"); +_Static_assert(sizeof (struct fid) >= LONG_FID_LEN, + "struct fid bigger than LONG_FID_LEN"); static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) @@ -1791,13 +1810,15 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) vnode_t *dvp; uint64_t object = 0; uint64_t fid_gen = 0; + uint64_t setgen = 0; uint64_t gen_mask; uint64_t zp_gen; int i, err; *vpp = NULL; - ZFS_ENTER(zfsvfs); + if ((err = zfs_enter(zfsvfs, FTAG)) != 0) + return (err); /* * On FreeBSD we can get snapshot's mount point or its parent file @@ -1806,7 +1827,6 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { zfid_long_t *zlfid = (zfid_long_t *)fidp; uint64_t objsetid = 0; - uint64_t setgen = 0; for (i = 0; i < sizeof (zlfid->zf_setid); i++) objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); @@ -1814,12 +1834,13 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) for (i = 0; i < sizeof (zlfid->zf_setgen); i++) setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); if (err) return (SET_ERROR(EINVAL)); - ZFS_ENTER(zfsvfs); + if ((err = zfs_enter(zfsvfs, FTAG)) != 0) + return (err); } if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { @@ -1831,7 +1852,14 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) for (i = 0; i < sizeof (zfid->zf_gen); i++) fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); } else { - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); + return (SET_ERROR(EINVAL)); + } + + if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { + zfs_exit(zfsvfs, FTAG); + dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", + (u_longlong_t)fid_gen, (u_longlong_t)setgen); return (SET_ERROR(EINVAL)); } @@ -1843,7 +1871,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) if ((fid_gen == 0 && (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); if (object == ZFSCTL_INO_SNAPDIR) { cn.cn_nameptr = "snapshot"; @@ -1878,7 +1906,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) (u_longlong_t)fid_gen, (u_longlong_t)gen_mask); if ((err = zfs_zget(zfsvfs, object, &zp))) { - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); return (err); } (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, @@ -1890,12 +1918,12 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) dprintf("znode gen (%llu) != fid gen (%llu)\n", (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); vrele(ZTOV(zp)); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); return (SET_ERROR(EINVAL)); } *vpp = ZTOV(zp); - ZFS_EXIT(zfsvfs); + zfs_exit(zfsvfs, FTAG); err = vn_lock(*vpp, flags); if (err == 0) vnode_create_vobject(*vpp, zp->z_size, curthread); @@ -1963,7 +1991,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) /* * Attempt to re-establish all the active znodes with * their dbufs. If a zfs_rezget() fails, then we'll let - * any potential callers discover that via ZFS_ENTER_VERIFY_VP + * any potential callers discover that via zfs_enter_verify_zp * when they try to use their znode. */ mutex_enter(&zfsvfs->z_znodes_lock); @@ -2042,6 +2070,26 @@ zfs_vnodes_adjust_back(void) #endif } +#if __FreeBSD_version >= 1300139 +static struct sx zfs_vnlru_lock; +static struct vnode *zfs_vnlru_marker; +#endif +static arc_prune_t *zfs_prune; + +static void +zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) +{ + if (nr_to_scan > INT_MAX) + nr_to_scan = INT_MAX; +#if __FreeBSD_version >= 1300139 + sx_xlock(&zfs_vnlru_lock); + vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); + sx_xunlock(&zfs_vnlru_lock); +#else + vnlru_free(nr_to_scan, &zfs_vfsops); +#endif +} + void zfs_init(void) { @@ -2068,11 +2116,23 @@ zfs_init(void) dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); + +#if __FreeBSD_version >= 1300139 + zfs_vnlru_marker = vnlru_alloc_marker(); + sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); +#endif + zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); } void zfs_fini(void) { + arc_remove_prune_callback(zfs_prune); +#if __FreeBSD_version >= 1300139 + vnlru_free_marker(zfs_vnlru_marker); + sx_destroy(&zfs_vnlru_lock); +#endif + taskq_destroy(zfsvfs_taskq); zfsctl_fini(); zfs_znode_fini(); @@ -2187,92 +2247,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) } /* - * Read a property stored within the master node. - */ -int -zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) -{ - uint64_t *cached_copy = NULL; - - /* - * Figure out where in the objset_t the cached copy would live, if it - * is available for the requested property. - */ - if (os != NULL) { - switch (prop) { - case ZFS_PROP_VERSION: - cached_copy = &os->os_version; - break; - case ZFS_PROP_NORMALIZE: - cached_copy = &os->os_normalization; - break; - case ZFS_PROP_UTF8ONLY: - cached_copy = &os->os_utf8only; - break; - case ZFS_PROP_CASE: - cached_copy = &os->os_casesensitivity; - break; - default: - break; - } - } - if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { - *value = *cached_copy; - return (0); - } - - /* - * If the property wasn't cached, look up the file system's value for - * the property. For the version property, we look up a slightly - * different string. - */ - const char *pname; - int error = ENOENT; - if (prop == ZFS_PROP_VERSION) { - pname = ZPL_VERSION_STR; - } else { - pname = zfs_prop_to_name(prop); - } - - if (os != NULL) { - ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); - error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); - } - - if (error == ENOENT) { - /* No value set, use the default value */ - switch (prop) { - case ZFS_PROP_VERSION: - *value = ZPL_VERSION; - break; - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - *value = 0; - break; - case ZFS_PROP_CASE: - *value = ZFS_CASE_SENSITIVE; - break; - case ZFS_PROP_ACLTYPE: - *value = ZFS_ACLTYPE_NFSV4; - break; - default: - return (error); - } - error = 0; - } - - /* - * If one of the methods for getting the property value above worked, - * copy it into the objset_t's cache. - */ - if (error == 0 && cached_copy != NULL) { - *cached_copy = *value; - } - - return (error); -} - -/* * Return true if the corresponding vfs's unmounted flag is set. * Otherwise return false. * If this function returns true we know VFS unmount has been initiated. @@ -2326,3 +2300,248 @@ zfsvfs_update_fromname(const char *oldname, const char *newname) mtx_unlock(&mountlist_mtx); } #endif + +/* + * Find a prison with ZFS info. + * Return the ZFS info and the (locked) prison. + */ +static struct zfs_jailparam * +zfs_jailparam_find(struct prison *spr, struct prison **prp) +{ + struct prison *pr; + struct zfs_jailparam *zjp; + + for (pr = spr; ; pr = pr->pr_parent) { + mtx_lock(&pr->pr_mtx); + if (pr == &prison0) { + zjp = &zfs_jailparam0; + break; + } + zjp = osd_jail_get(pr, zfs_jailparam_slot); + if (zjp != NULL) + break; + mtx_unlock(&pr->pr_mtx); + } + *prp = pr; + + return (zjp); +} + +/* + * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the + * ZFS info and lock the prison. + */ +static void +zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) +{ + struct prison *ppr; + struct zfs_jailparam *zjp, *nzjp; + void **rsv; + + /* If this prison already has ZFS info, return that. */ + zjp = zfs_jailparam_find(pr, &ppr); + if (ppr == pr) + goto done; + + /* + * Allocate a new info record. Then check again, in case something + * changed during the allocation. + */ + mtx_unlock(&ppr->pr_mtx); + nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); + rsv = osd_reserve(zfs_jailparam_slot); + zjp = zfs_jailparam_find(pr, &ppr); + if (ppr == pr) { + free(nzjp, M_PRISON); + osd_free_reserved(rsv); + goto done; + } + /* Inherit the initial values from the ancestor. */ + mtx_lock(&pr->pr_mtx); + (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); + (void) memcpy(nzjp, zjp, sizeof (*zjp)); + zjp = nzjp; + mtx_unlock(&ppr->pr_mtx); +done: + if (zjpp != NULL) + *zjpp = zjp; + else + mtx_unlock(&pr->pr_mtx); +} + +/* + * Jail OSD methods for ZFS VFS info. + */ +static int +zfs_jailparam_create(void *obj, void *data) +{ + struct prison *pr = obj; + struct vfsoptlist *opts = data; + int jsys; + + if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && + jsys == JAIL_SYS_INHERIT) + return (0); + /* + * Inherit a prison's initial values from its parent + * (different from JAIL_SYS_INHERIT which also inherits changes). + */ + zfs_jailparam_alloc(pr, NULL); + return (0); +} + +static int +zfs_jailparam_get(void *obj, void *data) +{ + struct prison *ppr, *pr = obj; + struct vfsoptlist *opts = data; + struct zfs_jailparam *zjp; + int jsys, error; + + zjp = zfs_jailparam_find(pr, &ppr); + jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; + error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); + if (error != 0 && error != ENOENT) + goto done; + if (jsys == JAIL_SYS_NEW) { + error = vfs_setopt(opts, "zfs.mount_snapshot", + &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); + if (error != 0 && error != ENOENT) + goto done; + } else { + /* + * If this prison is inheriting its ZFS info, report + * empty/zero parameters. + */ + static int mount_snapshot = 0; + + error = vfs_setopt(opts, "zfs.mount_snapshot", + &mount_snapshot, sizeof (mount_snapshot)); + if (error != 0 && error != ENOENT) + goto done; + } + error = 0; +done: + mtx_unlock(&ppr->pr_mtx); + return (error); +} + +static int +zfs_jailparam_set(void *obj, void *data) +{ + struct prison *pr = obj; + struct prison *ppr; + struct vfsoptlist *opts = data; + int error, jsys, mount_snapshot; + + /* Set the parameters, which should be correct. */ + error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); + if (error == ENOENT) + jsys = -1; + error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, + sizeof (mount_snapshot)); + if (error == ENOENT) + mount_snapshot = -1; + else + jsys = JAIL_SYS_NEW; + switch (jsys) { + case JAIL_SYS_NEW: + { + /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ + struct zfs_jailparam *zjp; + + /* + * A child jail cannot have more permissions than its parent + */ + if (pr->pr_parent != &prison0) { + zjp = zfs_jailparam_find(pr->pr_parent, &ppr); + mtx_unlock(&ppr->pr_mtx); + if (zjp->mount_snapshot < mount_snapshot) { + return (EPERM); + } + } + zfs_jailparam_alloc(pr, &zjp); + if (mount_snapshot != -1) + zjp->mount_snapshot = mount_snapshot; + mtx_unlock(&pr->pr_mtx); + break; + } + case JAIL_SYS_INHERIT: + /* "zfs=inherit": inherit the parent's ZFS info. */ + mtx_lock(&pr->pr_mtx); + osd_jail_del(pr, zfs_jailparam_slot); + mtx_unlock(&pr->pr_mtx); + break; + case -1: + /* + * If the setting being changed is not ZFS related + * then do nothing. + */ + break; + } + + return (0); +} + +static int +zfs_jailparam_check(void *obj __unused, void *data) +{ + struct vfsoptlist *opts = data; + int error, jsys, mount_snapshot; + + /* Check that the parameters are correct. */ + error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); + if (error != ENOENT) { + if (error != 0) + return (error); + if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) + return (EINVAL); + } + error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, + sizeof (mount_snapshot)); + if (error != ENOENT) { + if (error != 0) + return (error); + if (mount_snapshot != 0 && mount_snapshot != 1) + return (EINVAL); + } + return (0); +} + +static void +zfs_jailparam_destroy(void *data) +{ + + free(data, M_PRISON); +} + +static void +zfs_jailparam_sysinit(void *arg __unused) +{ + struct prison *pr; + osd_method_t methods[PR_MAXMETHOD] = { + [PR_METHOD_CREATE] = zfs_jailparam_create, + [PR_METHOD_GET] = zfs_jailparam_get, + [PR_METHOD_SET] = zfs_jailparam_set, + [PR_METHOD_CHECK] = zfs_jailparam_check, + }; + + zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); + /* Copy the defaults to any existing prisons. */ + sx_slock(&allprison_lock); + TAILQ_FOREACH(pr, &allprison, pr_list) + zfs_jailparam_alloc(pr, NULL); + sx_sunlock(&allprison_lock); +} + +static void +zfs_jailparam_sysuninit(void *arg __unused) +{ + + osd_jail_deregister(zfs_jailparam_slot); +} + +SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, + zfs_jailparam_sysinit, NULL); +SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, + zfs_jailparam_sysuninit, NULL); |