aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c863
1 files changed, 662 insertions, 201 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 287de4ce7573..e9a956ce4bb4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/* Portions Copyright 2010 Robert Milkowski */
+
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -45,6 +46,7 @@
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/zap.h>
+#include <sys/sa.h>
#include <sys/varargs.h>
#include <sys/policy.h>
#include <sys/atomic.h>
@@ -55,17 +57,19 @@
#include <sys/dnlc.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
+#include <sys/sa.h>
+#include "zfs_comutil.h"
struct mtx zfs_debug_mtx;
MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
-int zfs_super_owner = 0;
+int zfs_super_owner;
SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
"File system owner can perform privileged operation on his file systems");
-int zfs_debug_level = 0;
+int zfs_debug_level;
TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
"Debug level");
@@ -74,12 +78,6 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
static int zfs_version_acl = ZFS_ACL_VERSION;
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
"ZFS_ACL_VERSION");
-static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION;
-SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD,
- &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION");
-static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION;
-SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD,
- &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION");
static int zfs_version_spa = SPA_VERSION;
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
"SPA_VERSION");
@@ -156,9 +154,8 @@ zfs_sync(vfs_t *vfsp, int waitfor)
}
if (zfsvfs->z_log != NULL)
- zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
- else
- txg_wait_synced(dp, 0);
+ zil_commit(zfsvfs->z_log, 0);
+
ZFS_EXIT(zfsvfs);
} else {
/*
@@ -172,6 +169,60 @@ zfs_sync(vfs_t *vfsp, int waitfor)
return (0);
}
+#ifndef __FreeBSD__
+static int
+zfs_create_unique_device(dev_t *dev)
+{
+ major_t new_major;
+
+ do {
+ ASSERT3U(zfs_minor, <=, MAXMIN32);
+ minor_t start = zfs_minor;
+ do {
+ mutex_enter(&zfs_dev_mtx);
+ if (zfs_minor >= MAXMIN32) {
+ /*
+ * If we're still using the real major
+ * keep out of /dev/zfs and /dev/zvol minor
+ * number space. If we're using a getudev()'ed
+ * major number, we can use all of its minors.
+ */
+ if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
+ zfs_minor = ZFS_MIN_MINOR;
+ else
+ zfs_minor = 0;
+ } else {
+ zfs_minor++;
+ }
+ *dev = makedevice(zfs_major, zfs_minor);
+ mutex_exit(&zfs_dev_mtx);
+ } while (vfs_devismounted(*dev) && zfs_minor != start);
+ if (zfs_minor == start) {
+ /*
+ * We are using all ~262,000 minor numbers for the
+ * current major number. Create a new major number.
+ */
+ if ((new_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN,
+ "zfs_mount: Can't get unique major "
+ "device number.");
+ return (-1);
+ }
+ mutex_enter(&zfs_dev_mtx);
+ zfs_major = new_major;
+ zfs_minor = 0;
+
+ mutex_exit(&zfs_dev_mtx);
+ } else {
+ break;
+ }
+ /* CONSTANTCONDITION */
+ } while (1);
+
+ return (0);
+}
+#endif /* !__FreeBSD__ */
+
static void
atime_changed_cb(void *arg, uint64_t newval)
{
@@ -313,14 +364,6 @@ vscan_changed_cb(void *arg, uint64_t newval)
}
static void
-acl_mode_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_mode = newval;
-}
-
-static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
@@ -335,11 +378,11 @@ zfs_register_callbacks(vfs_t *vfsp)
objset_t *os = NULL;
zfsvfs_t *zfsvfs = NULL;
uint64_t nbmand;
- int readonly, do_readonly = FALSE;
- int setuid, do_setuid = FALSE;
- int exec, do_exec = FALSE;
- int xattr, do_xattr = FALSE;
- int atime, do_atime = FALSE;
+ int readonly, do_readonly = B_FALSE;
+ int setuid, do_setuid = B_FALSE;
+ int exec, do_exec = B_FALSE;
+ int xattr, do_xattr = B_FALSE;
+ int atime, do_atime = B_FALSE;
int error = 0;
ASSERT(vfsp);
@@ -360,7 +403,8 @@ zfs_register_callbacks(vfs_t *vfsp)
* of mount options, we stash away the current values and
* restore them after we register the callbacks.
*/
- if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
+ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
+ !spa_writeable(dmu_objset_spa(os))) {
readonly = B_TRUE;
do_readonly = B_TRUE;
} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
@@ -444,8 +488,6 @@ zfs_register_callbacks(vfs_t *vfsp)
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
@@ -483,7 +525,6 @@ unregister:
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
@@ -491,62 +532,53 @@ unregister:
}
-static void
-uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
- int64_t delta, dmu_tx_t *tx)
-{
- uint64_t used = 0;
- char buf[32];
- int err;
- uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
-
- if (delta == 0)
- return;
-
- (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
- err = zap_lookup(os, obj, buf, 8, 1, &used);
- ASSERT(err == 0 || err == ENOENT);
- /* no underflow/overflow */
- ASSERT(delta > 0 || used >= -delta);
- ASSERT(delta < 0 || used + delta > used);
- used += delta;
- if (used == 0)
- err = zap_remove(os, obj, buf, tx);
- else
- err = zap_update(os, obj, buf, 8, 1, &used, tx);
- ASSERT(err == 0);
-}
-
-static void
-zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
- void *oldbonus, void *newbonus,
- uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
+static int
+zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
+ uint64_t *userp, uint64_t *groupp)
{
- znode_phys_t *oldznp = oldbonus;
- znode_phys_t *newznp = newbonus;
+ znode_phys_t *znp = data;
+ int error = 0;
- if (bonustype != DMU_OT_ZNODE)
- return;
+ /*
+ * Is it a valid type of object to track?
+ */
+ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
+ return (ENOENT);
- /* We charge 512 for the dnode (if it's allocated). */
- if (oldznp->zp_gen != 0)
- oldused += DNODE_SIZE;
- if (newznp->zp_gen != 0)
- newused += DNODE_SIZE;
+ /*
+ * If we have a NULL data pointer
+ * then assume the id's aren't changing and
+ * return EEXIST to the dmu to let it know to
+ * use the same ids
+ */
+ if (data == NULL)
+ return (EEXIST);
- if (oldznp->zp_uid == newznp->zp_uid) {
- uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
+ if (bonustype == DMU_OT_ZNODE) {
+ *userp = znp->zp_uid;
+ *groupp = znp->zp_gid;
} else {
- uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
- uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
- }
+ int hdrsize;
- if (oldznp->zp_gid == newznp->zp_gid) {
- uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
- } else {
- uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
- uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
+ ASSERT(bonustype == DMU_OT_SA);
+ hdrsize = sa_hdrsize(data);
+
+ if (hdrsize != 0) {
+ *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_UID_OFFSET));
+ *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_GID_OFFSET));
+ } else {
+ /*
+ * This should only happen for newly created
+ * files that haven't had the znode data filled
+ * in yet.
+ */
+ *userp = 0;
+ *groupp = 0;
+ }
}
+ return (error);
}
static void
@@ -733,7 +765,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
}
boolean_t
-zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
+zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
char buf[32];
uint64_t used, quota, usedobj, quotaobj;
@@ -756,33 +788,48 @@ zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
return (used >= quota);
}
+boolean_t
+zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
+{
+ uint64_t fuid;
+ uint64_t quotaobj;
+
+ quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+ fuid = isgroup ? zp->z_gid : zp->z_uid;
+
+ if (quotaobj == 0 || zfsvfs->z_replay)
+ return (B_FALSE);
+
+ return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
+}
+
int
-zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
{
objset_t *os;
zfsvfs_t *zfsvfs;
uint64_t zval;
int i, error;
+ uint64_t sa_obj;
- if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
- return (error);
- if (zval)
- mode |= DS_MODE_READONLY;
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- if (error == EROFS) {
- mode |= DS_MODE_READONLY;
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- }
- if (error)
+ /*
+ * We claim to always be readonly so we can open snapshots;
+ * other ZPL code will prevent us from writing to snapshots.
+ */
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ if (error) {
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
+ }
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
- zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
zfsvfs->z_vfs = NULL;
zfsvfs->z_parent = zfsvfs;
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
@@ -792,15 +839,15 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
if (error) {
goto out;
- } else if (zfsvfs->z_version > ZPL_VERSION) {
- (void) printf("Mismatched versions: File system "
- "is version %llu on-disk format, which is "
- "incompatible with this software version %lld!",
- (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
+ } else if (zfsvfs->z_version >
+ zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+ (void) printf("Can't mount a version %lld file system "
+ "on a version %lld pool\n. Pool must be upgraded to mount "
+ "this file system.", (u_longlong_t)zfsvfs->z_version,
+ (u_longlong_t)spa_version(dmu_objset_spa(os)));
error = ENOTSUP;
goto out;
}
-
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
goto out;
zfsvfs->z_norm = (int)zval;
@@ -822,6 +869,29 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+ if (zfsvfs->z_use_sa) {
+ /* should either have both of these objects or none */
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+ &sa_obj);
+ if (error)
+ return (error);
+ } else {
+ /*
+ * Pre SA versions file systems should never touch
+ * either the attribute registration or layout objects.
+ */
+ sa_obj = 0;
+ }
+
+ error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+ &zfsvfs->z_attr_table);
+ if (error)
+ goto out;
+
+ if (zfsvfs->z_version >= ZPL_VERSION_SA)
+ sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
&zfsvfs->z_root);
@@ -857,7 +927,6 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
goto out;
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
@@ -867,12 +936,12 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
- *zvp = zfsvfs;
+ *zfvp = zfsvfs;
return (0);
out:
- dmu_objset_close(os);
- *zvp = NULL;
+ dmu_objset_disown(os, zfsvfs);
+ *zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
}
@@ -889,15 +958,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/*
* Set the objset user_ptr to track its zfsvfs.
*/
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
- if (zil_disable) {
- zil_destroy(zfsvfs->z_log, B_FALSE);
- zfsvfs->z_log = NULL;
- }
/*
* If we are not mounting (ie: online recv), then we don't
@@ -917,37 +982,42 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
else
zfs_unlinked_drain(zfsvfs);
- if (zfsvfs->z_log) {
- /*
- * Parse and replay the intent log.
- *
- * Because of ziltest, this must be done after
- * zfs_unlinked_drain(). (Further note: ziltest
- * doesn't use readonly mounts, where
- * zfs_unlinked_drain() isn't called.) This is because
- * ziltest causes spa_sync() to think it's committed,
- * but actually it is not, so the intent log contains
- * many txg's worth of changes.
- *
- * In particular, if object N is in the unlinked set in
- * the last txg to actually sync, then it could be
- * actually freed in a later txg and then reallocated
- * in a yet later txg. This would write a "create
- * object N" record to the intent log. Normally, this
- * would be fine because the spa_sync() would have
- * written out the fact that object N is free, before
- * we could write the "create object N" intent log
- * record.
- *
- * But when we are in ziltest mode, we advance the "open
- * txg" without actually spa_sync()-ing the changes to
- * disk. So we would see that object N is still
- * allocated and in the unlinked set, and there is an
- * intent log record saying to allocate it.
- */
- zfsvfs->z_replay = B_TRUE;
- zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
- zfsvfs->z_replay = B_FALSE;
+ /*
+ * Parse and replay the intent log.
+ *
+ * Because of ziltest, this must be done after
+ * zfs_unlinked_drain(). (Further note: ziltest
+ * doesn't use readonly mounts, where
+ * zfs_unlinked_drain() isn't called.) This is because
+ * ziltest causes spa_sync() to think it's committed,
+ * but actually it is not, so the intent log contains
+ * many txg's worth of changes.
+ *
+ * In particular, if object N is in the unlinked set in
+ * the last txg to actually sync, then it could be
+ * actually freed in a later txg and then reallocated
+ * in a yet later txg. This would write a "create
+ * object N" record to the intent log. Normally, this
+ * would be fine because the spa_sync() would have
+ * written out the fact that object N is free, before
+ * we could write the "create object N" intent log
+ * record.
+ *
+ * But when we are in ziltest mode, we advance the "open
+ * txg" without actually spa_sync()-ing the changes to
+ * disk. So we would see that object N is still
+ * allocated and in the unlinked set, and there is an
+ * intent log record saying to allocate it.
+ */
+ if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+ if (zil_replay_disable) {
+ zil_destroy(zfsvfs->z_log, B_FALSE);
+ } else {
+ zfsvfs->z_replay = B_TRUE;
+ zil_replay(zfsvfs->z_os, zfsvfs,
+ zfs_replay_vector);
+ zfsvfs->z_replay = B_FALSE;
+ }
}
zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
}
@@ -974,7 +1044,6 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
zfs_fuid_destroy(zfsvfs);
mutex_destroy(&zfsvfs->z_znodes_lock);
- mutex_destroy(&zfsvfs->z_online_recv_lock);
mutex_destroy(&zfsvfs->z_lock);
list_destroy(&zfsvfs->z_all_znodes);
rrw_destroy(&zfsvfs->z_teardown_lock);
@@ -989,13 +1058,24 @@ static void
zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
{
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
- if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
- vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
- vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
- vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
- vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
- vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+ if (zfsvfs->z_vfs) {
+ if (zfsvfs->z_use_fuids) {
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+ } else {
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+ vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+ }
}
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
}
static int
@@ -1009,7 +1089,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
ASSERT(vfsp);
ASSERT(osname);
- error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+ error = zfsvfs_create(osname, &zfsvfs);
if (error)
return (error);
zfsvfs->z_vfs = vfsp;
@@ -1026,7 +1106,6 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
-
/*
* The fsid is 64 bits, composed of an 8-bit fs type, which
* separates our fsid from any other filesystem types, and a
@@ -1053,6 +1132,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
}
+ vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
uint64_t pval;
@@ -1063,10 +1143,11 @@ zfs_domount(vfs_t *vfsp, char *osname)
goto out;
xattr_changed_cb(zfsvfs, pval);
zfsvfs->z_issnap = B_TRUE;
+ zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
} else {
error = zfsvfs_setup(zfsvfs, B_TRUE);
}
@@ -1080,7 +1161,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
zfsctl_create(zfsvfs);
out:
if (error) {
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
zfsvfs_free(zfsvfs);
} else {
atomic_add_32(&zfs_active_fs_count, 1);
@@ -1121,9 +1202,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
zfsvfs) == 0);
- VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
- zfsvfs) == 0);
-
VERIFY(dsl_prop_unregister(ds, "aclinherit",
acl_inherit_changed_cb, zfsvfs) == 0);
@@ -1132,6 +1210,302 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
}
}
+#ifdef SECLABEL
+/*
+ * Convert a decimal digit string to a uint64_t integer.
+ */
+static int
+str_to_uint64(char *str, uint64_t *objnum)
+{
+ uint64_t num = 0;
+
+ while (*str) {
+ if (*str < '0' || *str > '9')
+ return (EINVAL);
+
+ num = num*10 + *str++ - '0';
+ }
+
+ *objnum = num;
+ return (0);
+}
+
+/*
+ * The boot path passed from the boot loader is in the form of
+ * "rootpool-name/root-filesystem-object-number'. Convert this
+ * string to a dataset name: "rootpool-name/root-filesystem-name".
+ */
+static int
+zfs_parse_bootfs(char *bpath, char *outpath)
+{
+ char *slashp;
+ uint64_t objnum;
+ int error;
+
+ if (*bpath == 0 || *bpath == '/')
+ return (EINVAL);
+
+ (void) strcpy(outpath, bpath);
+
+ slashp = strchr(bpath, '/');
+
+ /* if no '/', just return the pool name */
+ if (slashp == NULL) {
+ return (0);
+ }
+
+ /* if not a number, just return the root dataset name */
+ if (str_to_uint64(slashp+1, &objnum)) {
+ return (0);
+ }
+
+ *slashp = '\0';
+ error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
+ *slashp = '/';
+
+ return (error);
+}
+
+/*
+ * zfs_check_global_label:
+ * Check that the hex label string is appropriate for the dataset
+ * being mounted into the global_zone proper.
+ *
+ * Return an error if the hex label string is not default or
+ * admin_low/admin_high. For admin_low labels, the corresponding
+ * dataset must be readonly.
+ */
+int
+zfs_check_global_label(const char *dsname, const char *hexsl)
+{
+ if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
+ return (0);
+ if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
+ return (0);
+ if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
+ /* must be readonly */
+ uint64_t rdonly;
+
+ if (dsl_prop_get_integer(dsname,
+ zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
+ return (EACCES);
+ return (rdonly ? 0 : EACCES);
+ }
+ return (EACCES);
+}
+
+/*
+ * zfs_mount_label_policy:
+ * Determine whether the mount is allowed according to MAC check.
+ * by comparing (where appropriate) label of the dataset against
+ * the label of the zone being mounted into. If the dataset has
+ * no label, create one.
+ *
+ * Returns:
+ * 0 : access allowed
+ * >0 : error code, such as EACCES
+ */
+static int
+zfs_mount_label_policy(vfs_t *vfsp, char *osname)
+{
+ int error, retv;
+ zone_t *mntzone = NULL;
+ ts_label_t *mnt_tsl;
+ bslabel_t *mnt_sl;
+ bslabel_t ds_sl;
+ char ds_hexsl[MAXNAMELEN];
+
+ retv = EACCES; /* assume the worst */
+
+ /*
+ * Start by getting the dataset label if it exists.
+ */
+ error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+ 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
+ if (error)
+ return (EACCES);
+
+ /*
+ * If labeling is NOT enabled, then disallow the mount of datasets
+ * which have a non-default label already. No other label checks
+ * are needed.
+ */
+ if (!is_system_labeled()) {
+ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
+ return (0);
+ return (EACCES);
+ }
+
+ /*
+ * Get the label of the mountpoint. If mounting into the global
+ * zone (i.e. mountpoint is not within an active zone and the
+ * zoned property is off), the label must be default or
+ * admin_low/admin_high only; no other checks are needed.
+ */
+ mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
+ if (mntzone->zone_id == GLOBAL_ZONEID) {
+ uint64_t zoned;
+
+ zone_rele(mntzone);
+
+ if (dsl_prop_get_integer(osname,
+ zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
+ return (EACCES);
+ if (!zoned)
+ return (zfs_check_global_label(osname, ds_hexsl));
+ else
+ /*
+ * This is the case of a zone dataset being mounted
+ * initially, before the zone has been fully created;
+ * allow this mount into global zone.
+ */
+ return (0);
+ }
+
+ mnt_tsl = mntzone->zone_slabel;
+ ASSERT(mnt_tsl != NULL);
+ label_hold(mnt_tsl);
+ mnt_sl = label2bslabel(mnt_tsl);
+
+ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
+ /*
+ * The dataset doesn't have a real label, so fabricate one.
+ */
+ char *str = NULL;
+
+ if (l_to_str_internal(mnt_sl, &str) == 0 &&
+ dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+ ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
+ retv = 0;
+ if (str != NULL)
+ kmem_free(str, strlen(str) + 1);
+ } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
+ /*
+ * Now compare labels to complete the MAC check. If the
+ * labels are equal then allow access. If the mountpoint
+ * label dominates the dataset label, allow readonly access.
+ * Otherwise, access is denied.
+ */
+ if (blequal(mnt_sl, &ds_sl))
+ retv = 0;
+ else if (bldominates(mnt_sl, &ds_sl)) {
+ vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
+ retv = 0;
+ }
+ }
+
+ label_rele(mnt_tsl);
+ zone_rele(mntzone);
+ return (retv);
+}
+#endif /* SECLABEL */
+
+#ifdef OPENSOLARIS_MOUNTROOT
+static int
+zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
+{
+ int error = 0;
+ static int zfsrootdone = 0;
+ zfsvfs_t *zfsvfs = NULL;
+ znode_t *zp = NULL;
+ vnode_t *vp = NULL;
+ char *zfs_bootfs;
+ char *zfs_devid;
+
+ ASSERT(vfsp);
+
+ /*
+ * The filesystem that we mount as root is defined in the
+ * boot property "zfs-bootfs" with a format of
+ * "poolname/root-dataset-objnum".
+ */
+ if (why == ROOT_INIT) {
+ if (zfsrootdone++)
+ return (EBUSY);
+ /*
+ * the process of doing a spa_load will require the
+ * clock to be set before we could (for example) do
+ * something better by looking at the timestamp on
+ * an uberblock, so just set it to -1.
+ */
+ clkset(-1);
+
+ if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
+ cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
+ "bootfs name");
+ return (EINVAL);
+ }
+ zfs_devid = spa_get_bootprop("diskdevid");
+ error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
+ if (zfs_devid)
+ spa_free_bootprop(zfs_devid);
+ if (error) {
+ spa_free_bootprop(zfs_bootfs);
+ cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
+ error);
+ return (error);
+ }
+ if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
+ spa_free_bootprop(zfs_bootfs);
+ cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
+ error);
+ return (error);
+ }
+
+ spa_free_bootprop(zfs_bootfs);
+
+ if (error = vfs_lock(vfsp))
+ return (error);
+
+ if (error = zfs_domount(vfsp, rootfs.bo_name)) {
+ cmn_err(CE_NOTE, "zfs_domount: error %d", error);
+ goto out;
+ }
+
+ zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
+ ASSERT(zfsvfs);
+ if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
+ cmn_err(CE_NOTE, "zfs_zget: error %d", error);
+ goto out;
+ }
+
+ vp = ZTOV(zp);
+ mutex_enter(&vp->v_lock);
+ vp->v_flag |= VROOT;
+ mutex_exit(&vp->v_lock);
+ rootvp = vp;
+
+ /*
+ * Leave rootvp held. The root file system is never unmounted.
+ */
+
+ vfs_add((struct vnode *)0, vfsp,
+ (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
+out:
+ vfs_unlock(vfsp);
+ return (error);
+ } else if (why == ROOT_REMOUNT) {
+ readonly_changed_cb(vfsp->vfs_data, B_FALSE);
+ vfsp->vfs_flag |= VFS_REMOUNT;
+
+ /* refresh mount options */
+ zfs_unregister_callbacks(vfsp->vfs_data);
+ return (zfs_register_callbacks(vfsp));
+
+ } else if (why == ROOT_UNMOUNT) {
+ zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
+ (void) zfs_sync(vfsp, 0, 0);
+ return (0);
+ }
+
+ /*
+ * if "why" is equal to anything else other than ROOT_INIT,
+ * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
+ */
+ return (ENOTSUP);
+}
+#endif /* OPENSOLARIS_MOUNTROOT */
+
/*ARGSUSED*/
static int
zfs_mount(vfs_t *vfsp)
@@ -1203,6 +1577,12 @@ zfs_mount(vfs_t *vfsp)
goto out;
}
+#ifdef SECLABEL
+ error = zfs_mount_label_policy(vfsp, osname);
+ if (error)
+ goto out;
+#endif
+
vfsp->vfs_flag |= MNT_NFS4ACLS;
/*
@@ -1291,6 +1671,25 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp)
return (0);
}
+int
+zfs_vnode_lock(vnode_t *vp, int flags)
+{
+ int error;
+
+ ASSERT(vp != NULL);
+
+ /*
+ * Check if the file system wasn't forcibly unmounted in the meantime.
+ */
+ error = vn_lock(vp, flags);
+ if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
+ VOP_UNLOCK(vp, 0);
+ error = ENOENT;
+ }
+
+ return (error);
+}
+
static int
zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
{
@@ -1301,14 +1700,18 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
ZFS_ENTER_NOERROR(zfsvfs);
error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+ if (error == 0)
+ *vpp = ZTOV(rootzp);
ZFS_EXIT(zfsvfs);
if (error == 0) {
- *vpp = ZTOV(rootzp);
- error = vn_lock(*vpp, flags);
- (*vpp)->v_vflag |= VV_ROOT;
+ error = zfs_vnode_lock(*vpp, flags);
+ if (error == 0)
+ (*vpp)->v_vflag |= VV_ROOT;
}
+ if (error != 0)
+ *vpp = NULL;
return (error);
}
@@ -1371,7 +1774,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
mutex_enter(&zfsvfs->z_znodes_lock);
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
zp = list_next(&zfsvfs->z_all_znodes, zp))
- if (zp->z_dbuf) {
+ if (zp->z_sa_hdl) {
ASSERT(ZTOV(zp)->v_count >= 0);
zfs_znode_dmu_fini(zp);
}
@@ -1416,10 +1819,10 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
/*
* Evict cached data
*/
- if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
- txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
- (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
- }
+ if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
+ if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
+ txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+ (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}
@@ -1440,6 +1843,7 @@ zfs_umount(vfs_t *vfsp, int fflag)
ZFS_DELEG_PERM_MOUNT, cr))
return (ret);
}
+
/*
* We purge the parent filesystem's vfsp as the parent filesystem
* and all of its snapshots have their vnode's v_vfsp set to the
@@ -1525,14 +1929,14 @@ zfs_umount(vfs_t *vfsp, int fflag)
/*
* Unset the objset user_ptr.
*/
- mutex_enter(&os->os->os_user_ptr_lock);
+ mutex_enter(&os->os_user_ptr_lock);
dmu_objset_set_user(os, NULL);
- mutex_exit(&os->os->os_user_ptr_lock);
+ mutex_exit(&os->os_user_ptr_lock);
/*
* Finally release the objset
*/
- dmu_objset_close(os);
+ dmu_objset_disown(os, zfsvfs);
}
/*
@@ -1572,13 +1976,13 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
VN_RELE(ZTOV(zp));
err = EINVAL;
}
+ if (err == 0)
+ *vpp = ZTOV(zp);
ZFS_EXIT(zfsvfs);
+ if (err == 0)
+ err = zfs_vnode_lock(*vpp, flags);
if (err != 0)
*vpp = NULL;
- else {
- *vpp = ZTOV(zp);
- vn_lock(*vpp, flags);
- }
return (err);
}
@@ -1611,7 +2015,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
uint64_t fid_gen = 0;
uint64_t gen_mask;
uint64_t zp_gen;
- int i, err;
+ int i, err;
*vpp = NULL;
@@ -1665,8 +2069,10 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
VN_HOLD(*vpp);
}
ZFS_EXIT(zfsvfs);
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- return (0);
+ err = zfs_vnode_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ if (err != 0)
+ *vpp = NULL;
+ return (err);
}
gen_mask = -1ULL >> (64 - 8 * i);
@@ -1676,7 +2082,9 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
ZFS_EXIT(zfsvfs);
return (err);
}
- zp_gen = zp->z_phys->zp_gen & gen_mask;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+ sizeof (uint64_t));
+ zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
if (zp->z_unlinked || zp_gen != fid_gen) {
@@ -1686,12 +2094,14 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
return (EINVAL);
}
- ZFS_EXIT(zfsvfs);
-
*vpp = ZTOV(zp);
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread);
- return (0);
+ ZFS_EXIT(zfsvfs);
+ err = zfs_vnode_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ if (err == 0)
+ vnode_create_vobject(*vpp, zp->z_size, curthread);
+ else
+ *vpp = NULL;
+ return (err);
}
/*
@@ -1701,17 +2111,13 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
* 'z_teardown_inactive_lock' write held.
*/
int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
{
int error;
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
return (error);
-
- *modep = zfsvfs->z_os->os_mode;
- if (name)
- dmu_objset_name(zfsvfs->z_os, name);
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
return (0);
}
@@ -1720,21 +2126,49 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
* Reopen zfsvfs_t::z_os and release VOPs.
*/
int
-zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
+zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
{
int err;
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
- err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
+ err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
+ &zfsvfs->z_os);
if (err) {
zfsvfs->z_os = NULL;
} else {
znode_t *zp;
+ uint64_t sa_obj = 0;
+
+ /*
+ * Make sure version hasn't changed
+ */
+
+ err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
+ &zfsvfs->z_version);
+
+ if (err)
+ goto bail;
+
+ err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj);
+
+ if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
+ goto bail;
+
+ if ((err = sa_setup(zfsvfs->z_os, sa_obj,
+ zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0)
+ goto bail;
+
+ if (zfsvfs->z_version >= ZPL_VERSION_SA)
+ sa_register_update_callback(zfsvfs->z_os,
+ zfs_sa_upgrade);
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
+ zfs_set_fuid_feature(zfsvfs);
+
/*
* Attempt to re-establish all the active znodes with
* their dbufs. If a zfs_rezget() fails, then we'll let
@@ -1747,17 +2181,17 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
(void) zfs_rezget(zp);
}
mutex_exit(&zfsvfs->z_znodes_lock);
-
}
+bail:
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) {
/*
- * Since we couldn't reopen zfsvfs::z_os, force
- * unmount this file system.
+ * Since we couldn't reopen zfsvfs::z_os, or
+ * setup the sa framework force unmount this file system.
*/
if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
@@ -1773,9 +2207,11 @@ zfs_freevfs(vfs_t *vfsp)
#ifdef sun
/*
* If this is a snapshot, we have an extra VFS_HOLD on our parent
- * from zfs_mount(). Release it here.
+ * from zfs_mount(). Release it here. If we came through
+ * zfs_mountroot() instead, we didn't grab an extra hold, so
+ * skip the VFS_RELE for rootvfs.
*/
- if (zfsvfs->z_issnap)
+ if (zfsvfs->z_issnap && (vfsp != rootvfs))
VFS_RELE(zfsvfs->z_parent->z_vfs);
#endif /* sun */
@@ -1825,17 +2261,17 @@ zfs_init(void)
printf("ZFS filesystem version " ZPL_VERSION_STRING "\n");
/*
- * Initialize znode cache, vnode ops, etc...
+ * Initialize .zfs directory structures
*/
- zfs_znode_init();
+ zfsctl_init();
/*
- * Initialize .zfs directory structures
+ * Initialize znode cache, vnode ops, etc...
*/
- zfsctl_init();
+ zfs_znode_init();
/*
- * Reduce number of vnode. Originally number of vnodes is calculated
+ * Reduce number of vnodes. Originally number of vnodes is calculated
* with UFS inode in mind. We reduce it here, because it's too big for
* ZFS/i386.
*/
@@ -1871,13 +2307,23 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
if (newvers < zfsvfs->z_version)
return (EINVAL);
+ if (zfs_spa_version_map(newvers) >
+ spa_version(dmu_objset_spa(zfsvfs->z_os)))
+ return (ENOTSUP);
+
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+ ZFS_SA_ATTRS);
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+ }
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
}
+
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &newvers, tx);
@@ -1886,20 +2332,35 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
return (error);
}
- spa_history_internal_log(LOG_DS_UPGRADE,
- dmu_objset_spa(os), tx, CRED(),
- "oldver=%llu newver=%llu dataset = %llu",
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ uint64_t sa_obj;
+
+ ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+ SPA_VERSION_SA);
+ sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+ DMU_OT_NONE, 0, tx);
+
+ error = zap_add(os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+ ASSERT3U(error, ==, 0);
+
+ VERIFY(0 == sa_set_sa_object(os, sa_obj));
+ sa_register_update_callback(os, zfs_sa_upgrade);
+ }
+
+ spa_history_log_internal(LOG_DS_UPGRADE,
+ dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
zfsvfs->z_version, newvers, dmu_objset_id(os));
dmu_tx_commit(tx);
zfsvfs->z_version = newvers;
- if (zfsvfs->z_version >= ZPL_VERSION_FUID)
- zfs_set_fuid_feature(zfsvfs);
+ zfs_set_fuid_feature(zfsvfs);
return (0);
}
+
/*
* Read a property stored within the master node.
*/