aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--UPDATING23
-rw-r--r--cddl/compat/opensolaris/include/mnttab.h10
-rw-r--r--cddl/compat/opensolaris/misc/mnttab.c160
-rw-r--r--cddl/contrib/opensolaris/cmd/pyzfs/pyzfs.py79
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.821
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c251
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c18
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs.8854
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_main.c610
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_main.c88
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c674
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h49
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c30
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c1691
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c11
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h4
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c53
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c116
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c28
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c33
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h3
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/taskq.c15
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py28
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/allow.py394
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py205
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py29
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c610
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py28
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py277
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/util.py138
-rw-r--r--sys/cddl/boot/zfs/zfsimpl.h36
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris_policy.c10
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris_uio.c112
-rw-r--r--sys/cddl/compat/opensolaris/sys/misc.h3
-rw-r--r--sys/cddl/compat/opensolaris/sys/policy.h1
-rw-r--r--sys/cddl/compat/opensolaris/sys/sid.h7
-rw-r--r--sys/cddl/compat/opensolaris/sys/uio.h5
-rw-r--r--sys/cddl/compat/opensolaris/sys/vnode.h1
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c9
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h8
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c20
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h5
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c36
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c146
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c102
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c99
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c308
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c58
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c179
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c28
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c22
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c98
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c39
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c60
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c69
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c143
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c261
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c931
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c11
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c66
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c220
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h30
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h24
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h11
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h28
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h29
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h28
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h28
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h24
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h46
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c534
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c37
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c34
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c52
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c42
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c111
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c55
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c76
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c295
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c233
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c67
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c56
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c240
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c1067
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c50
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c68
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c619
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c746
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c247
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c226
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c504
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c44
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/acl.h8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/debug.h19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h45
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/eventdefs.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h10
-rw-r--r--sys/modules/zfs/Makefile1
129 files changed, 10791 insertions, 4788 deletions
diff --git a/UPDATING b/UPDATING
index fabcb5e5d3e9..956bdc8b4713 100644
--- a/UPDATING
+++ b/UPDATING
@@ -23,6 +23,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
ln -s aj /etc/malloc.conf.)
+20100713:
+ A new version of ZFS (version 15) has been merged to -HEAD.
+ This version uses a python library for the following subcommands:
+ zfs allow, zfs unallow, zfs groupspace, zfs userspace.
+ For full functionality of these commands the following port must
+ be installed: sysutils/py-zfs
+
20100429:
'vm_page's are now hashed by physical address to an array of mutexes.
Currently this is only used to serialize access to hold_count. Over
@@ -964,6 +971,22 @@ COMMON ITEMS:
path, and has the highest probability of being successful. Please try
this approach before reporting problems with a major version upgrade.
+ ZFS notes
+ ---------
+ When upgrading the boot ZFS pool to a new version, always follow
+ these two steps:
+
+ 1.) recompile and reinstall the ZFS boot loader and boot block
+ (this is part of "make buildworld" and "make installworld")
+
+ 2.) update the ZFS boot block on your boot drive
+
+ The following example updates the ZFS boot block on the first
+ partition (freebsd-boot) of a GPT partitioned drive ad0:
+ "gpart bootcode -p /boot/gptzfsboot -i 1 ad0"
+
+ Non-boot pools do not need these updates.
+
To build a kernel
-----------------
If you are updating from a prior version of FreeBSD (even one just
diff --git a/cddl/compat/opensolaris/include/mnttab.h b/cddl/compat/opensolaris/include/mnttab.h
index abd2f9dcc70c..a18dd8d1893b 100644
--- a/cddl/compat/opensolaris/include/mnttab.h
+++ b/cddl/compat/opensolaris/include/mnttab.h
@@ -3,10 +3,13 @@
#ifndef _OPENSOLARIS_MNTTAB_H_
#define _OPENSOLARIS_MNTTAB_H_
+#include <sys/param.h>
+#include <sys/mount.h>
+
#include <stdio.h>
#include <paths.h>
-#define MNTTAB _PATH_DEVNULL
+#define MNTTAB _PATH_DEVZERO
#define MNT_LINE_MAX 1024
#define umount2(p, f) unmount(p, f)
@@ -17,7 +20,12 @@ struct mnttab {
char *mnt_fstype;
char *mnt_mntopts;
};
+#define extmnttab mnttab
int getmntany(FILE *fd, struct mnttab *mgetp, struct mnttab *mrefp);
+int getmntent(FILE *fp, struct mnttab *mp);
+char *hasmntopt(struct mnttab *mnt, char *opt);
+
+void statfs2mnttab(struct statfs *sfs, struct mnttab *mp);
#endif /* !_OPENSOLARIS_MNTTAB_H_ */
diff --git a/cddl/compat/opensolaris/misc/mnttab.c b/cddl/compat/opensolaris/misc/mnttab.c
index 8c1c2d6dba8c..8f56d90f6232 100644
--- a/cddl/compat/opensolaris/misc/mnttab.c
+++ b/cddl/compat/opensolaris/misc/mnttab.c
@@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$");
#include <sys/mount.h>
#include <sys/mntent.h>
#include <sys/mnttab.h>
+
+#include <ctype.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -88,75 +91,126 @@ optadd(char *mntopts, size_t size, const char *opt)
strlcat(mntopts, opt, size);
}
-int
-getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp)
+void
+statfs2mnttab(struct statfs *sfs, struct mnttab *mp)
{
- static struct statfs *sfs = NULL;
static char mntopts[MNTMAXSTR];
- struct opt *o;
- long i, n, flags;
+ long flags;
- if (sfs != NULL) {
- free(sfs);
- sfs = NULL;
- }
mntopts[0] = '\0';
- n = getfsstat(NULL, 0, MNT_NOWAIT);
- if (n == -1)
- return (-1);
- n = sizeof(*sfs) * (n + 8);
- sfs = malloc(n);
- if (sfs == NULL)
- return (-1);
- n = getfsstat(sfs, n, MNT_WAIT);
- if (n == -1) {
- free(sfs);
- sfs = NULL;
- return (-1);
+ flags = sfs->f_flags;
+#define OPTADD(opt) optadd(mntopts, sizeof(mntopts), (opt))
+ if (flags & MNT_RDONLY)
+ OPTADD(MNTOPT_RO);
+ else
+ OPTADD(MNTOPT_RW);
+ if (flags & MNT_NOSUID)
+ OPTADD(MNTOPT_NOSUID);
+ else
+ OPTADD(MNTOPT_SETUID);
+ if (flags & MNT_UPDATE)
+ OPTADD(MNTOPT_REMOUNT);
+ if (flags & MNT_NOATIME)
+ OPTADD(MNTOPT_NOATIME);
+ else
+ OPTADD(MNTOPT_ATIME);
+ OPTADD(MNTOPT_NOXATTR);
+ if (flags & MNT_NOEXEC)
+ OPTADD(MNTOPT_NOEXEC);
+ else
+ OPTADD(MNTOPT_EXEC);
+#undef OPTADD
+ mp->mnt_special = sfs->f_mntfromname;
+ mp->mnt_mountp = sfs->f_mntonname;
+ mp->mnt_fstype = sfs->f_fstypename;
+ mp->mnt_mntopts = mntopts;
+}
+
+static struct statfs *gsfs = NULL;
+static int allfs = 0;
+
+static int
+statfs_init(void)
+{
+ struct statfs *sfs;
+ int error;
+
+ if (gsfs != NULL) {
+ free(gsfs);
+ gsfs = NULL;
}
- for (i = 0; i < n; i++) {
+ allfs = getfsstat(NULL, 0, MNT_WAIT);
+ if (allfs == -1)
+ goto fail;
+ gsfs = malloc(sizeof(gsfs[0]) * allfs * 2);
+ if (gsfs == NULL)
+ goto fail;
+ allfs = getfsstat(gsfs, (long)(sizeof(gsfs[0]) * allfs * 2),
+ MNT_WAIT);
+ if (allfs == -1)
+ goto fail;
+ sfs = realloc(gsfs, allfs * sizeof(gsfs[0]));
+ if (sfs != NULL)
+ gsfs = sfs;
+ return (0);
+fail:
+ error = errno;
+ if (gsfs != NULL)
+ free(gsfs);
+ gsfs = NULL;
+ allfs = 0;
+ return (error);
+}
+
+int
+getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp)
+{
+ struct statfs *sfs;
+ int i, error;
+
+ error = statfs_init();
+ if (error != 0)
+ return (error);
+
+ for (i = 0; i < allfs; i++) {
if (mrefp->mnt_special != NULL &&
- strcmp(mrefp->mnt_special, sfs[i].f_mntfromname) != 0) {
+ strcmp(mrefp->mnt_special, gsfs[i].f_mntfromname) != 0) {
continue;
}
if (mrefp->mnt_mountp != NULL &&
- strcmp(mrefp->mnt_mountp, sfs[i].f_mntonname) != 0) {
+ strcmp(mrefp->mnt_mountp, gsfs[i].f_mntonname) != 0) {
continue;
}
if (mrefp->mnt_fstype != NULL &&
- strcmp(mrefp->mnt_fstype, sfs[i].f_fstypename) != 0) {
+ strcmp(mrefp->mnt_fstype, gsfs[i].f_fstypename) != 0) {
continue;
}
- flags = sfs[i].f_flags;
-#define OPTADD(opt) optadd(mntopts, sizeof(mntopts), (opt))
- if (flags & MNT_RDONLY)
- OPTADD(MNTOPT_RO);
- else
- OPTADD(MNTOPT_RW);
- if (flags & MNT_NOSUID)
- OPTADD(MNTOPT_NOSUID);
- else
- OPTADD(MNTOPT_SETUID);
- if (flags & MNT_UPDATE)
- OPTADD(MNTOPT_REMOUNT);
- if (flags & MNT_NOATIME)
- OPTADD(MNTOPT_NOATIME);
- else
- OPTADD(MNTOPT_ATIME);
- OPTADD(MNTOPT_NOXATTR);
- if (flags & MNT_NOEXEC)
- OPTADD(MNTOPT_NOEXEC);
- else
- OPTADD(MNTOPT_EXEC);
-#undef OPTADD
- mgetp->mnt_special = sfs[i].f_mntfromname;
- mgetp->mnt_mountp = sfs[i].f_mntonname;
- mgetp->mnt_fstype = sfs[i].f_fstypename;
- mgetp->mnt_mntopts = mntopts;
+ statfs2mnttab(&gsfs[i], mgetp);
return (0);
}
- free(sfs);
- sfs = NULL;
return (-1);
}
+
+int
+getmntent(FILE *fp, struct mnttab *mp)
+{
+ struct statfs *sfs;
+ int error, nfs;
+
+ nfs = (int)lseek(fileno(fp), 0, SEEK_CUR);
+ if (nfs == -1)
+ return (errno);
+ /* If nfs is 0, we want to refresh out cache. */
+ if (nfs == 0 || gsfs == NULL) {
+ error = statfs_init();
+ if (error != 0)
+ return (error);
+ }
+ if (nfs >= allfs)
+ return (-1);
+ statfs2mnttab(&gsfs[nfs], mp);
+ if (lseek(fileno(fp), 1, SEEK_CUR) == -1)
+ return (errno);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/pyzfs/pyzfs.py b/cddl/contrib/opensolaris/cmd/pyzfs/pyzfs.py
new file mode 100644
index 000000000000..3867d91ccde5
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/pyzfs/pyzfs.py
@@ -0,0 +1,79 @@
+#! /usr/bin/python2.4 -S
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+# Note, we want SIGINT (control-c) to exit the process quietly, to mimic
+# the standard behavior of C programs. The best we can do with pure
+# Python is to run with -S (to disable "import site"), and start our
+# program with a "try" statement. Hopefully nobody hits ^C before our
+# try statement is executed.
+
+try:
+ import site
+ import gettext
+ import zfs.util
+ import zfs.ioctl
+ import sys
+ import errno
+
+ """This is the main script for doing zfs subcommands. It doesn't know
+ what subcommands there are, it just looks for a module zfs.<subcommand>
+ that implements that subcommand."""
+
+ _ = gettext.translation("SUNW_OST_OSCMD", "/usr/lib/locale",
+ fallback=True).gettext
+
+ if len(sys.argv) < 2:
+ sys.exit(_("missing subcommand argument"))
+
+ zfs.ioctl.set_cmdstr(" ".join(["zfs"] + sys.argv[1:]))
+
+ try:
+ # import zfs.<subcommand>
+ # subfunc = zfs.<subcommand>.do_<subcommand>
+
+ subcmd = sys.argv[1]
+ __import__("zfs." + subcmd)
+ submod = getattr(zfs, subcmd)
+ subfunc = getattr(submod, "do_" + subcmd)
+ except (ImportError, AttributeError):
+ sys.exit(_("invalid subcommand"))
+
+ try:
+ subfunc()
+ except zfs.util.ZFSError, e:
+ print(e)
+ sys.exit(1)
+
+except IOError, e:
+ import errno
+ import sys
+
+ if e.errno == errno.EPIPE:
+ sys.exit(1)
+ raise
+except KeyboardInterrupt:
+ import sys
+
+ sys.exit(1)
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.8 b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
index c9d5aed95b33..f6018256b444 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.8
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
@@ -1,23 +1,8 @@
'\" te
-.\" CDDL HEADER START
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\" CDDL HEADER END
.\" Copyright (c) 2004, Sun Microsystems, Inc. All Rights Reserved.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.TH zdb 1M "31 Oct 2005" "SunOS 5.11" "System Administration Commands"
.SH NAME
zdb \- ZFS debugger
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index f0b4ba45841c..7106beebdca9 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -87,8 +87,8 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-udibcsv] [-U cachefile_path] "
- "[-S user:cksumalg] "
+ "Usage: %s [-udibcsvL] [-U cachefile_path] [-t txg]\n"
+ "\t [-S user:cksumalg] "
"dataset [object...]\n"
" %s -C [pool]\n"
" %s -l dev\n"
@@ -102,12 +102,16 @@ usage(void)
(void) fprintf(stderr, " -C cached pool configuration\n");
(void) fprintf(stderr, " -i intent logs\n");
(void) fprintf(stderr, " -b block statistics\n");
- (void) fprintf(stderr, " -c checksum all data blocks\n");
+ (void) fprintf(stderr, " -m metaslabs\n");
+ (void) fprintf(stderr, " -c checksum all metadata (twice for "
+ "all data) blocks\n");
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
(void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
"dump blkptr signatures\n");
(void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
+ (void) fprintf(stderr, " -L disable leak tracking (do not "
+ "load spacemaps)\n");
(void) fprintf(stderr, " -U cachefile_path -- use alternate "
"cachefile\n");
(void) fprintf(stderr, " -R read and display block from a "
@@ -115,12 +119,19 @@ usage(void)
(void) fprintf(stderr, " -e Pool is exported/destroyed/"
"has altroot\n");
(void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
+ (void) fprintf(stderr, " -t <txg> highest txg to use when "
+ "searching for uberblocks\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
exit(1);
}
+/*
+ * Called for usage errors that are discovered after a call to spa_open(),
+ * dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
+ */
+
static void
fatal(const char *fmt, ...)
{
@@ -132,7 +143,7 @@ fatal(const char *fmt, ...)
va_end(ap);
(void) fprintf(stderr, "\n");
- abort();
+ exit(1);
}
static void
@@ -205,7 +216,7 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
size_t nvsize = *(uint64_t *)data;
char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
- VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
+ VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
@@ -431,7 +442,7 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
alloc = 0;
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
VERIFY(0 == dmu_read(os, smo->smo_object, offset,
- sizeof (entry), &entry));
+ sizeof (entry), &entry, DMU_READ_PREFETCH));
if (SM_DEBUG_DECODE(entry)) {
(void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
(u_longlong_t)(offset / sizeof (entry)),
@@ -463,6 +474,21 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
}
static void
+dump_metaslab_stats(metaslab_t *msp)
+{
+ char maxbuf[5];
+ space_map_t *sm = &msp->ms_map;
+ avl_tree_t *t = sm->sm_pp_root;
+ int free_pct = sm->sm_space * 100 / sm->sm_size;
+
+ nicenum(space_map_maxsize(sm), maxbuf);
+
+ (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n",
+ "segments", avl_numnodes(t), "maxsize", maxbuf,
+ "freepct", free_pct);
+}
+
+static void
dump_metaslab(metaslab_t *msp)
{
char freebuf[5];
@@ -472,22 +498,28 @@ dump_metaslab(metaslab_t *msp)
nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
- if (dump_opt['d'] <= 5) {
- (void) printf("\t%10llx %10llu %5s\n",
- (u_longlong_t)msp->ms_map.sm_start,
- (u_longlong_t)smo->smo_object,
- freebuf);
- return;
- }
-
(void) printf(
- "\tvdev %llu offset %08llx spacemap %4llu free %5s\n",
+ "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
(u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
(u_longlong_t)smo->smo_object, freebuf);
- ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
+ if (dump_opt['m'] > 1) {
+ mutex_enter(&msp->ms_lock);
+ VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops,
+ SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0);
+ dump_metaslab_stats(msp);
+ space_map_unload(&msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
+
+ if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
+ ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
+
+ mutex_enter(&msp->ms_lock);
+ dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
- dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
}
static void
@@ -502,14 +534,12 @@ dump_metaslabs(spa_t *spa)
for (c = 0; c < rvd->vdev_children; c++) {
vd = rvd->vdev_child[c];
- (void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id);
+ (void) printf("\t%-10s %-19s %-15s %-10s\n",
+ "vdev", "offset", "spacemap", "free");
+ (void) printf("\t%10s %19s %15s %10s\n",
+ "----------", "-------------------",
+ "---------------", "-------------");
- if (dump_opt['d'] <= 5) {
- (void) printf("\t%10s %10s %5s\n",
- "offset", "spacemap", "free");
- (void) printf("\t%10s %10s %5s\n",
- "------", "--------", "----");
- }
for (m = 0; m < vd->vdev_ms_count; m++)
dump_metaslab(vd->vdev_ms[m]);
(void) printf("\n");
@@ -517,44 +547,52 @@ dump_metaslabs(spa_t *spa)
}
static void
+dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ char *prefix = (void *)sm;
+
+ (void) printf("%s [%llu,%llu) length %llu\n",
+ prefix,
+ (u_longlong_t)start,
+ (u_longlong_t)(start + size),
+ (u_longlong_t)(size));
+}
+
+static void
dump_dtl(vdev_t *vd, int indent)
{
- avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
- space_seg_t *ss;
- vdev_t *pvd;
- int c;
+ spa_t *spa = vd->vdev_spa;
+ boolean_t required;
+ char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
+ char prefix[256];
+
+ spa_vdev_state_enter(spa);
+ required = vdev_dtl_required(vd);
+ (void) spa_vdev_state_exit(spa, NULL, 0);
if (indent == 0)
(void) printf("\nDirty time logs:\n\n");
- (void) printf("\t%*s%s\n", indent, "",
+ (void) printf("\t%*s%s [%s]\n", indent, "",
vd->vdev_path ? vd->vdev_path :
- vd->vdev_parent ? vd->vdev_ops->vdev_op_type :
- spa_name(vd->vdev_spa));
+ vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
+ required ? "DTL-required" : "DTL-expendable");
- for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
- /*
- * Everything in this DTL must appear in all parent DTL unions.
- */
- for (pvd = vd; pvd; pvd = pvd->vdev_parent)
- ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
- ss->ss_start, ss->ss_end - ss->ss_start));
- (void) printf("\t%*soutage [%llu,%llu] length %llu\n",
- indent, "",
- (u_longlong_t)ss->ss_start,
- (u_longlong_t)ss->ss_end - 1,
- (u_longlong_t)(ss->ss_end - ss->ss_start));
- }
-
- (void) printf("\n");
-
- if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
- dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
- &vd->vdev_dtl_map);
- (void) printf("\n");
+ for (int t = 0; t < DTL_TYPES; t++) {
+ space_map_t *sm = &vd->vdev_dtl[t];
+ if (sm->sm_space == 0)
+ continue;
+ (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
+ indent + 2, "", name[t]);
+ mutex_enter(sm->sm_lock);
+ space_map_walk(sm, dump_dtl_seg, (void *)prefix);
+ mutex_exit(sm->sm_lock);
+ if (dump_opt['d'] > 5 && vd->vdev_children == 0)
+ dump_spacemap(spa->spa_meta_objset,
+ &vd->vdev_dtl_smo, sm);
}
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
dump_dtl(vd->vdev_child[c], indent + 4);
}
@@ -668,7 +706,8 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
break;
fill += cbp->blk_fill;
}
- ASSERT3U(fill, ==, bp->blk_fill);
+ if (!err)
+ ASSERT3U(fill, ==, bp->blk_fill);
(void) arc_buf_remove_ref(buf, &buf);
}
@@ -904,6 +943,7 @@ dump_uidgid(objset_t *os, znode_phys_t *zp)
/* first find the fuid object. It lives in the master node */
VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
8, 1, &fuid_obj) == 0);
+ zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
(void) zfs_fuid_table_load(os, fuid_obj,
&idx_tree, &domain_tree);
fuid_table_loaded = B_TRUE;
@@ -1007,6 +1047,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_packed_nvlist, /* FUID nvlist size */
dump_zap, /* DSL dataset next clones */
dump_zap, /* DSL scrub queue */
+ dump_zap, /* ZFS user/group used */
+ dump_zap, /* ZFS user/group quota */
};
static void
@@ -1070,6 +1112,14 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
if (verbosity >= 4) {
+ (void) printf("\tdnode flags: %s%s\n",
+ (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
+ "USED_BYTES " : "",
+ (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
+ "USERUSED_ACCOUNTED " : "");
+ (void) printf("\tdnode maxblkid: %llu\n",
+ (longlong_t)dn->dn_phys->dn_maxblkid);
+
object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
object_viewer[doi.doi_type](os, object, NULL, 0);
*print_header = 1;
@@ -1124,7 +1174,7 @@ dump_dir(objset_t *os)
uint64_t object, object_count;
uint64_t refdbytes, usedobjs, scratch;
char numbuf[8];
- char blkbuf[BP_SPRINTF_LEN];
+ char blkbuf[BP_SPRINTF_LEN + 20];
char osname[MAXNAMELEN];
char *type = "UNKNOWN";
int verbosity = dump_opt['d'];
@@ -1150,8 +1200,8 @@ dump_dir(objset_t *os)
nicenum(refdbytes, numbuf);
if (verbosity >= 4) {
- (void) strcpy(blkbuf, ", rootbp ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
+ (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
+ (void) sprintf_blkptr(blkbuf + strlen(blkbuf),
BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
} else {
blkbuf[0] = '\0';
@@ -1186,7 +1236,12 @@ dump_dir(objset_t *os)
}
dump_object(os, 0, verbosity, &print_header);
- object_count = 1;
+ object_count = 0;
+ if (os->os->os_userused_dnode &&
+ os->os->os_userused_dnode->dn_type != 0) {
+ dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
+ dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
+ }
object = 0;
while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
@@ -1198,8 +1253,10 @@ dump_dir(objset_t *os)
(void) printf("\n");
- if (error != ESRCH)
- fatal("dmu_object_next() = %d", error);
+ if (error != ESRCH) {
+ (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
+ abort();
+ }
}
static void
@@ -1390,7 +1447,8 @@ static space_map_ops_t zdb_space_map_ops = {
zdb_space_map_unload,
NULL, /* alloc */
zdb_space_map_claim,
- NULL /* free */
+ NULL, /* free */
+ NULL /* maxsize */
};
static void
@@ -1489,8 +1547,9 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
}
}
- VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
- NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+ if (!dump_opt['L'])
+ VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
+ NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
}
static int
@@ -1499,13 +1558,25 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
{
zdb_cb_t *zcb = arg;
char blkbuf[BP_SPRINTF_LEN];
+ dmu_object_type_t type;
+ boolean_t is_l0_metadata;
if (bp == NULL)
return (0);
- zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
+ type = BP_GET_TYPE(bp);
+
+ zdb_count_block(spa, zcb, bp, type);
- if (dump_opt['c'] || dump_opt['S']) {
+ /*
+ * if we do metadata-only checksumming there's no need to checksum
+ * indirect blocks here because it is done during traverse
+ */
+ is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES &&
+ dmu_ot[type].ot_metadata);
+
+ if (dump_opt['c'] > 1 || dump_opt['S'] ||
+ (dump_opt['c'] && is_l0_metadata)) {
int ioerr, size;
void *data;
@@ -1517,7 +1588,7 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
free(data);
/* We expect io errors on intent log */
- if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
+ if (ioerr && type != DMU_OT_INTENT_LOG) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
@@ -1565,9 +1636,12 @@ dump_block_stats(spa_t *spa)
int c, e;
if (!dump_opt['S']) {
- (void) printf("\nTraversing all blocks to %sverify"
- " nothing leaked ...\n",
- dump_opt['c'] ? "verify checksums and " : "");
+ (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+ (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
+ (dump_opt['c'] == 1) ? "metadata " : "",
+ dump_opt['c'] ? "checksums " : "",
+ (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
+ !dump_opt['L'] ? "nothing leaked " : "");
}
/*
@@ -1578,7 +1652,8 @@ dump_block_stats(spa_t *spa)
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
- zdb_leak_init(spa);
+ if (!dump_opt['L'])
+ zdb_leak_init(spa);
/*
* If there's a deferred-free bplist, process that first.
@@ -1620,7 +1695,8 @@ dump_block_stats(spa_t *spa)
/*
* Report any leaked segments.
*/
- zdb_leak_fini(spa);
+ if (!dump_opt['L'])
+ zdb_leak_fini(spa);
/*
* If we're interested in printing out the blkptr signatures,
@@ -1646,14 +1722,16 @@ dump_block_stats(spa_t *spa)
tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
if (tzb->zb_asize == alloc + logalloc) {
- (void) printf("\n\tNo leaks (block sum matches space"
- " maps exactly)\n");
+ if (!dump_opt['L'])
+ (void) printf("\n\tNo leaks (block sum matches space"
+ " maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
- "(leaked %lld)\n",
+ "(%s %lld)\n",
(u_longlong_t)tzb->zb_asize,
(u_longlong_t)alloc + logalloc,
- (u_longlong_t)(alloc + logalloc - tzb->zb_asize));
+ (dump_opt['L']) ? "unreachable" : "leaked",
+ (longlong_t)(alloc + logalloc - tzb->zb_asize));
leaks = 1;
}
@@ -1760,14 +1838,17 @@ dump_zpool(spa_t *spa)
if (dump_opt['u'])
dump_uberblock(&spa->spa_uberblock);
- if (dump_opt['d'] || dump_opt['i']) {
+ if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) {
dump_dir(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
dump_bplist(dp->dp_meta_objset,
spa->spa_sync_bplist_obj, "Deferred frees");
dump_dtl(spa->spa_root_vdev, 0);
- dump_metaslabs(spa);
}
+
+ if (dump_opt['d'] >= 3 || dump_opt['m'])
+ dump_metaslabs(spa);
+
(void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
}
@@ -2243,13 +2324,14 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
+ while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) {
switch (c) {
case 'u':
case 'd':
case 'i':
case 'b':
case 'c':
+ case 'm':
case 's':
case 'C':
case 'l':
@@ -2257,6 +2339,9 @@ main(int argc, char **argv)
dump_opt[c]++;
dump_all = 0;
break;
+ case 'L':
+ dump_opt[c]++;
+ break;
case 'v':
verbose++;
break;
@@ -2287,6 +2372,14 @@ main(int argc, char **argv)
else
usage();
break;
+ case 't':
+ ub_max_txg = strtoull(optarg, NULL, 0);
+ if (ub_max_txg < TXG_INITIAL) {
+ (void) fprintf(stderr, "incorrect txg "
+ "specified: %s\n", optarg);
+ usage();
+ }
+ break;
default:
usage();
break;
@@ -2374,7 +2467,7 @@ main(int argc, char **argv)
}
if (error == 0)
- error = spa_import_faulted(argv[0],
+ error = spa_import_verbatim(argv[0],
exported_conf, nvl);
nvlist_free(nvl);
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index cc08ef514858..1b3c18fab1c2 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -115,7 +115,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
- if (verbose < 5)
+ if (txtype == TX_WRITE2 || verbose < 5)
return;
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
@@ -123,18 +123,19 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, "\t\t\t");
+ if (BP_IS_HOLE(bp)) {
+ (void) printf("\t\t\tLSIZE 0x%llx\n",
+ (u_longlong_t)BP_GET_LSIZE(bp));
+ }
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
} else {
zbookmark_t zb;
- ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
- dmu_objset_id(zilog->zl_os));
-
- zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
+ zb.zb_objset = dmu_objset_id(zilog->zl_os);
+ zb.zb_object = lr->lr_foid;
+ zb.zb_level = 0;
+ zb.zb_blkid = -1; /* unknown */
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
@@ -251,6 +252,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{ zil_prt_rec_create, "TX_MKDIR_ACL " },
{ zil_prt_rec_create, "TX_MKDIR_ATTR " },
{ zil_prt_rec_create, "TX_MKDIR_ACL_ATTR " },
+ { zil_prt_rec_write, "TX_WRITE2 " },
};
/* ARGSUSED */
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
index 9cda0e55643c..0d97026a4a43 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -1,9 +1,12 @@
'\" te
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
-.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
-.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zfs 1M "14 Feb 2009" "SunOS 5.11" "System Administration Commands"
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
+.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
+.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH zfs 1M "5 May 2009" "SunOS 5.11" "System Administration Commands"
.SH NAME
zfs \- configures ZFS file systems
.SH SYNOPSIS
@@ -66,7 +69,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fB-t\fR \fItype\fR[,...]]
+\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-H\fR][\fB-o\fR \fIproperty\fR[,...]] [\fB-t\fR \fItype\fR[,...]]
[\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ...
.fi
@@ -77,8 +80,8 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBget\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...]
- \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
+\fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIsource\fR[,...]]
+ "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
.fi
.LP
@@ -98,6 +101,18 @@ zfs \- configures ZFS file systems
.LP
.nf
+\fBzfs\fR \fBuserspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ...
+ [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBgroupspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ...
+ [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR
+.fi
+
+.LP
+.nf
\fBzfs\fR \fBmount\fR
.fi
@@ -128,12 +143,17 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR] \fB-d\fR \fIfilesystem\fR
+\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] \fB-d\fR \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
@@ -192,7 +212,7 @@ pool/{filesystem,volume,snapshot}
.sp
.LP
-where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
+\&...where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
.sp
.LP
A dataset can be one of the following:
@@ -215,7 +235,7 @@ A \fBZFS\fR dataset of type "filesystem" that can be mounted within the standard
.ad
.sp .6
.RS 4n
-A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. Volumes cannot be used in a non-global zone.
+A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments.
.RE
.sp
@@ -268,88 +288,88 @@ Creating a \fBZFS\fR file system is a simple operation, so the number of file sy
By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed.
.sp
.LP
-A file system can also have a mount point set in the "mountpoint" property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the "\fBzfs mount -a\fR" command is invoked (without editing \fB/etc/vfstab\fR). The mountpoint property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
+A file system can also have a mount point set in the \fBmountpoint\fR property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the \fBzfs mount -a\fR command is invoked (without editing \fB/etc/vfstab\fR). The \fBmountpoint\fR property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
.sp
.LP
-A file system mountpoint property of "none" prevents the file system from being mounted.
+A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system from being mounted.
.sp
.LP
-If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point is set to "legacy", \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system.
+If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system.
.SS "Zones"
.sp
.LP
-A \fBZFS\fR file system can be added to a non-global zone by using zonecfg's "\fBadd fs\fR" subcommand. A \fBZFS\fR file system that is added to a non-global zone must have its mountpoint property set to legacy.
+A \fBZFS\fR file system can be added to a non-global zone by using the \fBzonecfg\fR \fBadd fs\fR subcommand. A \fBZFS\fR file system that is added to a non-global zone must have its \fBmountpoint\fR property set to \fBlegacy\fR.
.sp
.LP
The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
.sp
.LP
-A dataset can also be delegated to a non-global zone by using zonecfg's "\fBadd dataset\fR" subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the "quota" property is controlled by the global administrator.
+A dataset can also be delegated to a non-global zone by using \fBzonecfg\fR \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the \fBquota\fR property is controlled by the global administrator.
.sp
.LP
-A \fBZFS\fR volume can be added as a device to a non-global zone by using zonecfg's "\fBadd device\fR" subcommand. However, its physical properties can only be modified by the global administrator.
+A \fBZFS\fR volume can be added as a device to a non-global zone by using \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can be modified only by the global administrator.
.sp
.LP
For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
.sp
.LP
-After a dataset is delegated to a non-global zone, the "zoned" property is automatically set. A zoned file system cannot be mounted in the global zone, since the zone administrator might have to set the mount point to an unacceptable value.
+After a dataset is delegated to a non-global zone, the \fBzoned\fR property is automatically set. A zoned file system cannot be mounted in the global zone, since the zone administrator might have to set the mount point to an unacceptable value.
.sp
.LP
-The global administrator can forcibly clear the "zoned" property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property.
+The global administrator can forcibly clear the \fBzoned\fR property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property.
.SS "Native Properties"
.sp
.LP
-Properties are divided into two types, native properties and user defined properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section.
+Properties are divided into two types, native and user-defined (or "user"). Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
.sp
.LP
-Every dataset has a set of properties that export statistics about the dataset as well as control various behavior. Properties are inherited from the parent unless overridden by the child. Some properties only apply to certain types of datasets (file systems, volumes or snapshots).
+Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots).
.sp
.LP
-The values of numeric properties can be specified using human-readable suffixes (for example, "k", "KB", "M", "Gb", etc, up to Z for zettabyte). The following are all valid (and equal) specifications:
+The values of numeric properties can be specified using human-readable suffixes (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR for zettabyte). The following are all valid (and equal) specifications:
.sp
.in +2
.nf
-"1536M", "1.5g", "1.50GB".
+1536M, 1.5g, 1.50GB
.fi
.in -2
.sp
.sp
.LP
-The values of non-numeric properties are case sensitive and must be lowercase, except for "mountpoint", "sharenfs" and "sharesmb".
+The values of non-numeric properties are case sensitive and must be lowercase, except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR.
.sp
.LP
-The following native properties consist of read-only statistics about the dataset. These properties cannot be set, nor are they inherited. Native properties apply to all dataset types unless otherwise noted.
+The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted.
.sp
.ne 2
.mk
.na
-\fBavailable\fR
+\fB\fBavailable\fR\fR
.ad
.sp .6
.RS 4n
The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool.
.sp
-This property can also be referred to by its shortened column name, "avail".
+This property can also be referred to by its shortened column name, \fBavail\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBcompressratio\fR
+\fB\fBcompressratio\fR\fR
.ad
.sp .6
.RS 4n
-The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running "zfs set compression=on \fIdataset\fR". The default value is "off".
+The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBcreation\fR
+\fB\fBcreation\fR\fR
.ad
.sp .6
.RS 4n
@@ -360,18 +380,18 @@ The time this dataset was created.
.ne 2
.mk
.na
-\fBmounted\fR
+\fB\fBmounted\fR\fR
.ad
.sp .6
.RS 4n
-For file systems, indicates whether the file system is currently mounted. This property can be either "yes" or "no".
+For file systems, indicates whether the file system is currently mounted. This property can be either \fByes\fR or \fBno\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBorigin\fR
+\fB\fBorigin\fR\fR
.ad
.sp .6
.RS 4n
@@ -382,31 +402,31 @@ For cloned file systems or volumes, the snapshot from which the clone was create
.ne 2
.mk
.na
-\fBreferenced\fR
+\fB\fBreferenced\fR\fR
.ad
.sp .6
.RS 4n
The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical.
.sp
-This property can also be referred to by its shortened column name, "refer".
+This property can also be referred to by its shortened column name, \fBrefer\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBtype\fR
+\fB\fBtype\fR\fR
.ad
.sp .6
.RS 4n
-The type of dataset: "filesystem", "volume", or "snapshot".
+The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBused\fR
+\fB\fBused\fR\fR
.ad
.sp .6
.RS 4n
@@ -421,18 +441,18 @@ The amount of space used, available, or referenced does not take into account pe
.ne 2
.mk
.na
-\fBusedby*\fR
+\fB\fBusedby*\fR\fR
.ad
.sp .6
.RS 4n
-The \fBusedby*\fR snapshots decompose the "used" properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on zpool "version 13" pools.
+The \fBusedby*\fR properties decompose the \fBused\fR properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on \fBzpool\fR "version 13" pools.
.RE
.sp
.ne 2
.mk
.na
-\fBusedbychildren\fR
+\fB\fBusedbychildren\fR\fR
.ad
.sp .6
.RS 4n
@@ -443,7 +463,7 @@ The amount of space used by children of this dataset, which would be freed if al
.ne 2
.mk
.na
-\fBusedbydataset\fR
+\fB\fBusedbydataset\fR\fR
.ad
.sp .6
.RS 4n
@@ -454,7 +474,7 @@ The amount of space used by this dataset itself, which would be freed if the dat
.ne 2
.mk
.na
-\fBusedbyrefreservation\fR
+\fB\fBusedbyrefreservation\fR\fR
.ad
.sp .6
.RS 4n
@@ -465,24 +485,76 @@ The amount of space used by a \fBrefreservation\fR set on this dataset, which wo
.ne 2
.mk
.na
-\fBusedbysnapshots\fR
+\fB\fBusedbysnapshots\fR\fR
+.ad
+.sp .6
+.RS 4n
+The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBuserused@\fR\fIuser\fR\fR
+.ad
+.sp .6
+.RS 4n
+The amount of space referenced in this dataset by the specified user. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
+.sp
+Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage.
+.sp
+This property cannot be set on volumes, or on pools before version 15. The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIposix name\fR (for example, \fBjoe\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIposix numeric id\fR (for example, \fB789\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+.RE
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBgroupused@\fR\fIgroup\fR\fR
.ad
.sp .6
.RS 4n
-The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' "used" properties because space can be shared by multiple snapshots
+The amount of space referenced in this dataset by the specified group. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
+.sp
+Unprivileged users can only access the \fBgroupused@\fR... property for groups that they are a member of. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
.RE
.sp
.ne 2
.mk
.na
-\fBvolblocksize=\fIblocksize\fR\fR
+\fB\fBvolblocksize\fR=\fIblocksize\fR\fR
.ad
.sp .6
.RS 4n
For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid.
.sp
-This property can also be referred to by its shortened column name, "volblock".
+This property can also be referred to by its shortened column name, \fBvolblock\fR.
.RE
.sp
@@ -492,48 +564,48 @@ The following native properties can be used to change the behavior of a \fBZFS\f
.ne 2
.mk
.na
-\fBaclinherit=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR
+\fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR
.ad
.sp .6
.RS 4n
-Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an "aclinherit" property of "\fBdiscard\fR" does not inherit any \fBACL\fR entries. A file system with an "aclinherit" property value of "\fBnoallow\fR" only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value "\fBrestricted\fR" (the default) removes the "\fBwrite_acl\fR" and "\fBwrite_owner\fR" permissions when the \fBACL\fR entry is inherited. A file system with an "aclinherit" property value of "\fBpassthrough\fR" inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an "aclinherit" property value of "\fBpassthrough-x\fR" has the same meaning as "\fBpassthrough\fR", except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit.
+Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value \fBrestricted\fR (the default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit.
.sp
-When the property value is set to "\fBpassthrough\fR," files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application.
+When the property value is set to \fBpassthrough\fR, files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application.
.RE
.sp
.ne 2
.mk
.na
-\fBaclmode=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
+\fB\fBaclmode\fR=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
.ad
.sp .6
.RS 4n
-Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an "aclmode" property of "\fBdiscard\fR" deletes all \fBACL\fR entries that do not represent the mode of the file. An "aclmode" property of "\fBgroupmask\fR" (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an "aclmode" property of "\fBpassthrough\fR" indicates that no changes are made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
+Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an \fBaclmode\fR property of \fBdiscard\fR deletes all \fBACL\fR entries that do not represent the mode of the file. An \fBaclmode\fR property of \fBgroupmask\fR (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an \fBaclmode\fR property of \fBpassthrough\fR indicates that no changes are made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
.RE
.sp
.ne 2
.mk
.na
-\fBatime=\fIon\fR | \fIoff\fR\fR
+\fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is "on".
+Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBcanmount=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR
+\fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR
.ad
.sp .6
.RS 4n
-If this property is set to "\fBoff\fR", the file system cannot be mounted, and is ignored by "\fBzfs mount -a\fR". Setting this property to "\fBoff\fR" is similar to setting the "mountpoint" property to "\fBnone\fR", except that the dataset still has a normal "mountpoint" property, which can be inherited. Setting this property to "\fBoff\fR" allows datasets to be used solely as a mechanism to inherit properties. One example of setting canmount=\fBoff\fR is to have two datasets with the same mountpoint, so that the children of both datasets appear in the same directory, but might have different inherited characteristics.
+If this property is set to \fBoff\fR, the file system cannot be mounted, and is ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset still has a normal \fBmountpoint\fR property, which can be inherited. Setting this property to \fBoff\fR allows datasets to be used solely as a mechanism to inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have two datasets with the same \fBmountpoint\fR, so that the children of both datasets appear in the same directory, but might have different inherited characteristics.
.sp
-When the "\fBnoauto\fR" option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the "\fBzfs mount -a\fR" command or unmounted by the "\fBzfs unmount -a\fR" command.
+When the \fBnoauto\fR option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or unmounted by the \fBzfs unmount -a\fR command.
.sp
This property is not inherited.
.RE
@@ -542,22 +614,22 @@ This property is not inherited.
.ne 2
.mk
.na
-\fBchecksum=\fIon\fR | \fIoff\fR | \fIfletcher2\fR, | \fIfletcher4\fR | \fIsha256\fR\fR
+\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2,\fR| \fBfletcher4\fR | \fBsha256\fR\fR
.ad
.sp .6
.RS 4n
-Controls the checksum used to verify data integrity. The default value is "on", which automatically selects an appropriate algorithm (currently, \fIfletcher2\fR, but this may change in future releases). The value "off" disables integrity checking on user data. Disabling checksums is NOT a recommended practice.
+Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher2\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
.RE
.sp
.ne 2
.mk
.na
-\fBcompression=\fIon\fR | \fIoff\fR | \fIlzjb\fR | \fIgzip\fR | \fIgzip-N\fR\fR
+\fB\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | \fBgzip-\fR\fIN\fR\fR
.ad
.sp .6
.RS 4n
-Controls the compression algorithm used for this dataset. The "lzjb" compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip" compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR" where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
+Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip" compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR" where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
.sp
This property can also be referred to by its shortened column name "compress".
.RE
@@ -570,118 +642,172 @@ This property can also be referred to by its shortened column name "compress".
.ad
.sp .6
.RS 4n
-Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or raid-z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the "used" property and counting against quotas and reservations.
+Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or \fBraid-z\fR. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
.sp
-Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the "\fB-o\fR copies=" option.
+Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR option.
.RE
.sp
.ne 2
.mk
.na
-\fBdevices=\fIon\fR | \fIoff\fR\fR
+\fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether device nodes can be opened on this file system. The default value is "on".
+Controls whether device nodes can be opened on this file system. The default value is \fBon\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBexec=\fIon\fR | \fIoff\fR\fR
+\fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether processes can be executed from within this file system. The default value is "on".
+Controls whether processes can be executed from within this file system. The default value is \fBon\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBmountpoint=\fIpath\fR | \fInone\fR | \fIlegacy\fR\fR
+\fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR
.ad
.sp .6
.RS 4n
Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used.
.sp
-When the mountpoint property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is "legacy", then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously "legacy" or "none", or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location.
+When the \fBmountpoint\fR property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location.
.RE
.sp
.ne 2
.mk
.na
-\fBnbmand=\fIon\fR | \fIoff\fR\fR
+\fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the file system should be mounted with "\fBnbmand\fR" (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(1M) for more information on "\fBnbmand\fR" mounts.
+Controls whether the file system should be mounted with \fBnbmand\fR (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(1M) for more information on \fBnbmand\fR mounts.
.RE
.sp
.ne 2
.mk
.na
-\fBprimarycache=\fIall\fR | \fInone\fR | \fImetadata\fR\fR
+\fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
.ad
.sp .6
.RS 4n
-Controls what is cached in the primary cache (ARC). If this property is set to "all", then both user data and metadata is cached. If this property is set to "none", then neither user data nor metadata is cached. If this property is set to "metadata", then only metadata is cached. The default value is "all".
+Controls what is cached in the primary cache (ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBquota=\fIsize\fR | \fInone\fR\fR
+\fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit.
.sp
-Quotas cannot be set on volumes, as the "volsize" property acts as an implicit quota.
+Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implicit quota.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR
+.ad
+.sp .6
+.RS 4n
+Limits the amount of space referenced by the specified user, which is specified by the \fBuserspace@\fR\fIuser\fR property.
+.sp
+Enforcement of user quotas may be delayed by several seconds. In other words, users may go a bit over their quota before the system notices that they are over quota and begins to refuse additional writes with \fBEDQUOT\fR. See the \fBzfs userspace\fR subcommand for more information.
+.sp
+Unprivileged users can get only their own quota. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
+.sp
+This property cannot be set on volumes, on filesystems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIposix name\fR (for example, \fBjoe\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIposix numeric id\fR (for example, \fB789\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+.RE
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR
+.ad
+.sp .6
+.RS 4n
+Limits the amount of space referenced by the specified group. See the \fBuserquota@\fR\fIuser\fR property for more information.
+.sp
+Unprivileged users can only get the quota of groups they are a member of. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
.RE
.sp
.ne 2
.mk
.na
-\fBreadonly=\fIon\fR | \fIoff\fR\fR
+\fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether this dataset can be modified. The default value is "off".
+Controls whether this dataset can be modified. The default value is \fBoff\fR.
.sp
-This property can also be referred to by its shortened column name, "rdonly".
+This property can also be referred to by its shortened column name, \fBrdonly\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBrecordsize=\fIsize\fR\fR
+\fB\fBrecordsize\fR=\fIsize\fR\fR
.ad
.sp .6
.RS 4n
Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical access patterns.
.sp
-For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a "recordsize" greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance.
+For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance.
.sp
The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
.sp
-Changing the file system's \fBrecordsize\fR only affects files created afterward; existing files are unaffected.
+Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected.
.sp
-This property can also be referred to by its shortened column name, "recsize".
+This property can also be referred to by its shortened column name, \fBrecsize\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBrefquota=\fIsize\fR | \fInone\fR\fR
+\fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
@@ -692,7 +818,7 @@ Limits the amount of space a dataset can consume. This property enforces a hard
.ne 2
.mk
.na
-\fBrefreservation=\fIsize\fR | \fInone\fR\fR
+\fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
@@ -700,66 +826,66 @@ The minimum amount of space guaranteed to a dataset, not including its descenden
.sp
If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset.
.sp
-This property can also be referred to by its shortened column name, "refreserv".
+This property can also be referred to by its shortened column name, \fBrefreserv\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBreservation=\fIsize\fR | \fInone\fR\fR
+\fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations.
.sp
-This property can also be referred to by its shortened column name, "reserv".
+This property can also be referred to by its shortened column name, \fBreserv\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBsecondarycache=\fIall\fR | \fInone\fR | \fImetadata\fR\fR
+\fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
.ad
.sp .6
.RS 4n
-Controls what is cached in the secondary cache (L2ARC). If this property is set to "all", then both user data and metadata is cached. If this property is set to "none", then neither user data nor metadata is cached. If this property is set to "metadata", then only metadata is cached. The default value is "all".
+Controls what is cached in the secondary cache (L2ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBsetuid=\fIon\fR | \fIoff\fR\fR
+\fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is "on".
+Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is \fBon\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBshareiscsi=\fIon\fR | \fIoff\fR\fR
+\fB\fBshareiscsi\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Like the "sharenfs" property, "shareiscsi" indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are "on", "off", and "type=disk". The default value is "off". In the future, other target types might be supported. For example, "tape".
+Like the \fBsharenfs\fR property, \fBshareiscsi\fR indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are \fBon\fR, \fBoff\fR, and \fBtype=disk\fR. The default value is \fBoff\fR. In the future, other target types might be supported. For example, \fBtape\fR.
.sp
-You might want to set "shareiscsi=on" for a file system so that all \fBZFS\fR volumes within the file system are shared by default. Setting this property on a file system has no direct effect, however.
+You might want to set \fBshareiscsi=on\fR for a file system so that all \fBZFS\fR volumes within the file system are shared by default. However, setting this property on a file system has no direct effect.
.RE
.sp
.ne 2
.mk
.na
-\fBsharesmb=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
+\fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the file system is shared by using the Solaris \fBCIFS\fR service, and what options are to be used. A file system with the "\fBsharesmb\fR" property set to "off" is managed through traditional tools such as \fBsharemgr\fR(1M). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBsharemgr\fR(1M) command is invoked with no options. Otherwise, the \fBsharemgr\fR(1M) command is invoked with options equivalent to the contents of this property.
+Controls whether the file system is shared by using the Solaris \fBCIFS\fR service, and what options are to be used. A file system with the \fBsharesmb\fR property set to \fBoff\fR is managed through traditional tools such as \fBsharemgr\fR(1M). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBsharemgr\fR(1M) command is invoked with no options. Otherwise, the \fBsharemgr\fR(1M) command is invoked with options equivalent to the contents of this property.
.sp
Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore (\fB_\fR) characters. A pseudo property "name" is also supported that allows you to replace the data set name with a specified name. The specified name is then used to replace the prefix dataset in the case of inheritance. For example, if the dataset \fBdata/home/john\fR is set to \fBname=john\fR, then \fBdata/home/john\fR has a resource name of \fBjohn\fR. If a child dataset of \fBdata/home/john/backups\fR, it has a resource name of \fBjohn_backups\fR.
.sp
@@ -772,42 +898,42 @@ When the \fBsharesmb\fR property is changed for a dataset, the dataset and any c
.ne 2
.mk
.na
-\fBsharenfs=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
+\fB\fBsharenfs\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a"\fBsharenfs\fR" property of "off" is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the "\fBzfs share\fR" and "\fBzfs unshare\fR" commands. If the property is set to "on", the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
+Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
.sp
-When the "sharenfs" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is "off", the file systems are unshared.
+When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
.RE
.sp
.ne 2
.mk
.na
-\fBsnapdir=\fIhidden\fR | \fIvisible\fR\fR
+\fB\fBsnapdir\fR=\fBhidden\fR | \fBvisible\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the ".zfs" directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is "hidden".
+Controls whether the \fB\&.zfs\fR directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is \fBhidden\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBversion=\fB1\fR|\fB2\fR|\fBcurrent\fR\fR
+\fB\fBversion\fR=\fB1\fR | \fB2\fR | \fBcurrent\fR\fR
.ad
.sp .6
.RS 4n
-The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See "\fBzfs upgrade\fR".
+The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See the \fBzfs upgrade\fR command.
.RE
.sp
.ne 2
.mk
.na
-\fBvolsize=\fIsize\fR\fR
+\fB\fBvolsize\fR=\fIsize\fR\fR
.ad
.sp .6
.RS 4n
@@ -815,18 +941,18 @@ For volumes, specifies the logical size of the volume. By default, creating a vo
.sp
The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
.sp
-Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the "\fBzfs create -V\fR" command, or by changing the reservation after the volume has been created. A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
+Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the \fBzfs create -V\fR command, or by changing the reservation after the volume has been created. A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
.RE
.sp
.ne 2
.mk
.na
-\fBvscan=\fBon\fR|\fBoff\fR\fR
+\fB\fBvscan\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is "off".
+Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is \fBoff\fR.
.RE
.sp
@@ -837,65 +963,65 @@ Controls whether regular files should be scanned for viruses when a file is open
.ad
.sp .6
.RS 4n
-Controls whether extended attributes are enabled for this file system. The default value is "\fBon\fR".
+Controls whether extended attributes are enabled for this file system. The default value is \fBon\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBzoned=\fIon\fR | \fIoff\fR\fR
+\fB\fBzoned\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is "off".
+Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is \fBoff\fR.
.RE
.sp
.LP
-The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the "\fBzfs create\fR" or "\fBzpool create\fR" commands, these properties are inherited from the parent dataset. If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties.
+The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the \fBzfs create\fR or \fBzpool create\fR commands, these properties are inherited from the parent dataset. If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties.
.sp
.ne 2
.mk
.na
-\fBcasesensitivity = \fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR
+\fB\fBcasesensitivity\fR=\fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR
.ad
.sp .6
.RS 4n
-Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the "\fBcasesensitivity\fR" property is "\fBsensitive\fR." Traditionally, UNIX and POSIX file systems have case-sensitive file names.
+Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the \fBcasesensitivity\fR property is \fBsensitive\fR. Traditionally, UNIX and POSIX file systems have case-sensitive file names.
.sp
-The "\fBmixed\fR" value for the "\fBcasesensitivity\fR" property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system that supports mixed behavior is limited to the Solaris CIFS server product. For more information about the "mixed" value behavior, see the \fIZFS Administration Guide\fR.
+The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system that supports mixed behavior is limited to the Solaris CIFS server product. For more information about the \fBmixed\fR value behavior, see the \fISolaris ZFS Administration Guide\fR.
.RE
.sp
.ne 2
.mk
.na
-\fBnormalization =\fBnone\fR | \fBformD\fR | \fBformKCf\fR\fR
+\fB\fBnormalization\fR=\fBnone\fR | \fBformD\fR | \fBformKCf\fR\fR
.ad
.sp .6
.RS 4n
-Indicates whether the file system should perform a \fBunicode\fR normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison process. If this property is set to a legal value other than "\fBnone\fR," and the "\fButf8only\fR" property was left unspecified, the "\fButf8only\fR" property is automatically set to "\fBon\fR." The default value of the "\fBnormalization\fR" property is "\fBnone\fR." This property cannot be changed after the file system is created.
+Indicates whether the file system should perform a \fBunicode\fR normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison process. If this property is set to a legal value other than \fBnone\fR, and the \fButf8only\fR property was left unspecified, the \fButf8only\fR property is automatically set to \fBon\fR. The default value of the \fBnormalization\fR property is \fBnone\fR. This property cannot be changed after the file system is created.
.RE
.sp
.ne 2
.mk
.na
-\fButf8only =\fBon\fR | \fBoff\fR\fR
+\fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
-Indicates whether the file system should reject file names that include characters that are not present in the \fBUTF-8\fR character code set. If this property is explicitly set to "\fBoff\fR," the normalization property must either not be explicitly set or be set to "\fBnone\fR." The default value for the "\fButf8only\fR" property is "off." This property cannot be changed after the file system is created.
+Indicates whether the file system should reject file names that include characters that are not present in the \fBUTF-8\fR character code set. If this property is explicitly set to \fBoff\fR, the normalization property must either not be explicitly set or be set to \fBnone\fR. The default value for the \fButf8only\fR property is \fBoff\fR. This property cannot be changed after the file system is created.
.RE
.sp
.LP
-The "\fBcasesensitivity\fR," "\fBnormalization\fR," and "\fButf8only\fR" properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration feature.
+The \fBcasesensitivity\fR, \fBnormalization\fR, and \fButf8only\fR properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration feature.
.SS "Temporary Mount Point Properties"
.sp
.LP
-When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the "\fBzfs mount\fR" command for normal file systems, its mount options are set according to its properties. The correlation between properties and mount options is as follows:
+When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the \fBzfs mount\fR command for normal file systems, its mount options are set according to its properties. The correlation between properties and mount options is as follows:
.sp
.in +2
.nf
@@ -911,20 +1037,20 @@ When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts
.sp
.LP
-In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for "nodevices,nosetuid". These properties are reported as "temporary" by the "\fBzfs get\fR" command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
+In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for \fBnodevices,nosetuid\fR. These properties are reported as "temporary" by the \fBzfs get\fR command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
.SS "User Properties"
.sp
.LP
In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets (file systems, volumes, and snapshots).
.sp
.LP
-User property names must contain a colon (":") character to distinguish them from native properties. They may contain lowercase letters, numbers, and the following punctuation characters: colon (":"), dash ("-"), period ("."), and underscore ("_"). The expected convention is that the property name is divided into two portions such as "\fImodule\fR:\fIproperty\fR", but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters, and cannot begin with a dash ("-").
+User property names must contain a colon (\fB:\fR) character to distinguish them from native properties. They may contain lowercase letters, numbers, and the following punctuation characters: colon (\fB:\fR), dash (\fB-\fR), period (\fB\&.\fR), and underscore (\fB_\fR). The expected convention is that the property name is divided into two portions such as \fImodule\fR\fB:\fR\fIproperty\fR, but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters, and cannot begin with a dash (\fB-\fR).
.sp
.LP
-When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for different purposes. Property names beginning with "com.sun." are reserved for use by Sun Microsystems.
+When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for different purposes. Property names beginning with \fBcom.sun\fR. are reserved for use by Sun Microsystems.
.sp
.LP
-The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties ("zfs list", "zfs get", "zfs set", etc.) can be used to manipulate both native properties and user properties. Use the "\fBzfs inherit\fR" command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
+The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties (\fBzfs list\fR, \fBzfs get\fR, \fBzfs set\fR, and so forth) can be used to manipulate both native properties and user properties. Use the \fBzfs inherit\fR command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
.SS "ZFS Volumes as Swap or Dump Devices"
.sp
.LP
@@ -964,7 +1090,7 @@ Creates a new \fBZFS\fR file system. The file system is automatically mounted ac
.ad
.sp .6
.RS 4n
-Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
.RE
.sp
@@ -975,7 +1101,7 @@ Creates all the non-existing parent datasets. Datasets created in this manner ar
.ad
.sp .6
.RS 4n
-Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the command \fBzfs set \fIproperty\fR=\fIvalue\fR\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.RE
@@ -999,7 +1125,7 @@ Creates a volume of the given size. The volume is exported as a block device in
.ad
.sp .6
.RS 4n
-Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
.RE
.sp
@@ -1010,7 +1136,7 @@ Creates all the non-existing parent datasets. Datasets created in this manner ar
.ad
.sp .6
.RS 4n
-Creates a sparse volume with no reservation. See "volsize" in the Native Properties section for more information about sparse volumes.
+Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native Properties section for more information about sparse volumes.
.RE
.sp
@@ -1021,7 +1147,7 @@ Creates a sparse volume with no reservation. See "volsize" in the Native Propert
.ad
.sp .6
.RS 4n
-Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the \fBzfs set \fIproperty\fR=\fIvalue\fR\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.sp
@@ -1032,7 +1158,7 @@ Sets the specified property as if "\fBzfs set property=value\fR" was invoked at
.ad
.sp .6
.RS 4n
-Equivalent to "\fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR". If this option is specified in conjunction with "\fB\fR\fB-o\fR \fBvolblocksize\fR", the resulting behavior is undefined.
+Equivalent to \fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
.RE
.RE
@@ -1076,7 +1202,7 @@ Recursively destroy all dependents, including cloned file systems outside the ta
.ad
.sp .6
.RS 4n
-Force an unmount of any file systems using the "\fBunmount -f\fR" command. This option has no effect on non-file systems or unmounted file systems.
+Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems.
.RE
Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
@@ -1090,7 +1216,7 @@ Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR o
.ad
.sp .6
.RS 4n
-Creates a snapshot with the given name. See the "Snapshots" section for details.
+Creates a snapshot with the given name. All previous modifications by successful system calls to the file system are part of the snapshot. See the "Snapshots" section for details.
.sp
.ne 2
.mk
@@ -1110,7 +1236,7 @@ Recursively create snapshots of all descendent datasets. Snapshots are taken ato
.ad
.sp .6
.RS 4n
-Sets the specified property; see "\fBzfs create\fR" for details.
+Sets the specified property; see \fBzfs create\fR for details.
.RE
.RE
@@ -1176,7 +1302,7 @@ Creates a clone of the given snapshot. See the "Clones" section for details. The
.ad
.sp .6
.RS 4n
-Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. If the target filesystem or volume already exists, the operation completes successfully.
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. If the target filesystem or volume already exists, the operation completes successfully.
.RE
.sp
@@ -1187,7 +1313,7 @@ Creates all the non-existing parent datasets. Datasets created in this manner ar
.ad
.sp .6
.RS 4n
-Sets the specified property; see "\fBzfs create\fR" for details.
+Sets the specified property; see \fBzfs create\fR for details.
.RE
.RE
@@ -1200,9 +1326,9 @@ Sets the specified property; see "\fBzfs create\fR" for details.
.ad
.sp .6
.RS 4n
-Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the "origin" file system becomes a clone of the specified file system.
+Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the origin file system becomes a clone of the specified file system.
.sp
-The snapshot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the "origin" file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The "\fBrename\fR" subcommand can be used to rename any conflicting snapshots.
+The snapshot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the origin file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The \fBrename\fR subcommand can be used to rename any conflicting snapshots.
.RE
.sp
@@ -1230,7 +1356,7 @@ Renames the given dataset. The new target can be located anywhere in the \fBZFS\
.ad
.sp .6
.RS 4n
-Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent.
+Creates all the nonexistent parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent.
.RE
.RE
@@ -1250,15 +1376,11 @@ Recursively rename the snapshots of all descendent datasets. Snapshots are the o
.ne 2
.mk
.na
-\fB\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIproperty\fR[,\fI\&...\fR]] [ \fB-t\fR \fItype\fR[,\fI\&...\fR]]\fR
-.ad
-.br
-.na
-\fB[ \fB-s\fR \fIproperty\fR ] ... [ \fB-S\fR \fIproperty\fR ] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ...\fR
+\fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,\fI\&...\fR]] [ \fB-t\fR \fItype\fR[,\fI\&...\fR]] [ \fB-s\fR \fIproperty\fR ] ... [ \fB-S\fR \fIproperty\fR ] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ...\fR
.ad
.sp .6
.RS 4n
-Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the "listsnaps" property is "on" (the default is "off") . The following fields are displayed:
+Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed:
.sp
.in +2
.nf
@@ -1293,6 +1415,17 @@ Recursively display any children of the dataset on the command line.
.ne 2
.mk
.na
+\fB\fB-d\fR \fIdepth\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fB-o\fR \fIproperty\fR\fR
.ad
.sp .6
@@ -1302,25 +1435,34 @@ A comma-separated list of properties to display. The property must be:
.TP
.ie t \(bu
.el o
-one of the properties described in the "Native Properties" section.
+one of the properties described in the "Native Properties" section
.RE
.RS +4
.TP
.ie t \(bu
.el o
-a user property.
+a user property
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value "name" to display the dataset name.
+the value \fBname\fR to display the dataset name
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value "space" to display space usage properties on file systems and volumes. This is a shortcut for "\fB-o name,avail,used,usedsnap,usedds, usedrefreserv,usedchild -t filesystem,volume\fR".
+the value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for:
+.sp
+.in +2
+.nf
+-o name,avail,used,usedsnap,usedds,usedrefreserv,\e
+usedchild -t filesystem,volume
+.fi
+.in -2
+.sp
+
.RE
.RE
@@ -1332,7 +1474,7 @@ the value "space" to display space usage properties on file systems and volumes.
.ad
.sp .6
.RS 4n
-A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value "name" to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
+A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
.sp
The following is a list of sorting criteria:
.RS +4
@@ -1357,7 +1499,7 @@ Types inappropriate for a row sort that row to the literal bottom, regardless of
.TP
.ie t \(bu
.el o
-If no sorting options are specified the existing behavior of "\fBzfs list\fR" is preserved.
+If no sorting options are specified the existing behavior of \fBzfs list\fR is preserved.
.RE
.RE
@@ -1380,7 +1522,7 @@ Same as the \fB-s\fR option, but sorts by property in descending order.
.ad
.sp .6
.RS 4n
-A comma-separated list of types to display, where "type" is one of "filesystem", "snapshot" , "volume" or "all". For example, specifying "\fB-t snapshot\fR" displays only snapshots.
+A comma-separated list of types to display, where \fItype\fR is one of \fBfilesystem\fR, \fBsnapshot\fR , \fBvolume\fR, or \fBall\fR. For example, specifying \fB-t snapshot\fR displays only snapshots.
.RE
.RE
@@ -1393,14 +1535,14 @@ A comma-separated list of types to display, where "type" is one of "filesystem",
.ad
.sp .6
.RS 4n
-Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of "B", "K", "M", "G", "T", "P", "E", "Z" (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
+Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs get\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,...] [\fB-s\fR \fIsource\fR[,...] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
+\fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR \fIfield\fR[,...] [\fB-s\fR \fIsource\fR[,...] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
.ad
.sp .6
.RS 4n
@@ -1419,7 +1561,7 @@ Displays properties for the given datasets. If no datasets are specified, then t
All columns are displayed by default, though this can be controlled by using the \fB-o\fR option. This command takes a comma-separated list of properties as described in the "Native Properties" and "User Properties" sections.
.sp
-The special value "all" can be used to display all properties that apply to the given dataset's type (filesystem, volume or snapshot).
+The special value \fBall\fR can be used to display all properties that apply to the given dataset's type (filesystem, volume, or snapshot).
.sp
.ne 2
.mk
@@ -1435,6 +1577,17 @@ Recursively display properties for any children.
.ne 2
.mk
.na
+\fB\fB-d\fR \fIdepth\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fB-H\fR\fR
.ad
.sp .6
@@ -1450,7 +1603,7 @@ Display output in a form more easily parsed by scripts. Any headers are omitted,
.ad
.sp .6
.RS 4n
-A comma-separated list of columns to display. "name,property,value,source" is the default value.
+A comma-separated list of columns to display. \fBname,property,value,source\fR is the default value.
.RE
.sp
@@ -1461,7 +1614,7 @@ A comma-separated list of columns to display. "name,property,value,source" is th
.ad
.sp .6
.RS 4n
-A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: "local,default,inherited,temporary,none". The default value is all sources.
+A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: \fBlocal,default,inherited,temporary,none\fR. The default value is all sources.
.RE
.sp
@@ -1472,7 +1625,7 @@ A comma-separated list of sources to display. Those properties coming from a sou
.ad
.sp .6
.RS 4n
-Display numbers in parsable (exact) values.
+Display numbers in parseable (exact) values.
.RE
.RE
@@ -1518,11 +1671,11 @@ Displays a list of file systems that are not the most recent version.
.ad
.sp .6
.RS 4n
-Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. "\fBzfs send\fR" streams generated from new snapshots of these file systems can not be accessed on systems running older versions of the software.
+Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems can not be accessed on systems running older versions of the software.
.sp
-The file system version is independent of the pool version (see \fBzpool\fR(1M) for information on the "\fBzpool upgrade\fR" command).
+The file system version is independent of the pool version (see \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command).
.sp
-The file system version does not have to be upgraded when the pool version is upgraded, and vice versa.
+The file system version does not have to be upgraded when the pool version is upgraded, and vice-versa.
.sp
.ne 2
.mk
@@ -1573,6 +1726,159 @@ Upgrade to the specified \fIversion\fR. If the \fB-V\fR flag is not specified, t
.ne 2
.mk
.na
+\fB\fBzfs userspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR]... [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR | \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays space consumed by, and quotas on, each user in the specified filesystem or snapshot. This corresponds to the \fBuserused@\fR\fIuser\fR and \fBuserquota@\fR\fIuser\fR properties.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-n\fR\fR
+.ad
+.sp .6
+.RS 4n
+Print numeric ID instead of user/group name.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.sp .6
+.RS 4n
+Do not print headers, use tab-delimited output.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-p\fR\fR
+.ad
+.sp .6
+.RS 4n
+Use exact (parseable) numeric output.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIfield\fR[,...]\fR
+.ad
+.sp .6
+.RS 4n
+Display only the specified fields, from the following set:
+.sp
+.in +2
+.nf
+type,name,used,quota
+.fi
+.in -2
+.sp
+
+The default is to display all fields.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR \fIfield\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is:
+.sp
+.in +2
+.nf
+-s type -s name
+.fi
+.in -2
+.sp
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-S\fR \fIfield\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sort by this field in reverse order. See \fB-s\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-t\fR \fItype\fR[,...]\fR
+.ad
+.sp .6
+.RS 4n
+Print only the specified types, from the following set:
+.sp
+.in +2
+.nf
+all,posixuser,smbuser,posixgroup,smbgroup
+.fi
+.in -2
+.sp
+
+The default is:
+.sp
+.in +2
+.nf
+-t posixuser,smbuser
+.fi
+.in -2
+.sp
+
+\&...but can be changed to include group types.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-i\fR\fR
+.ad
+.sp .6
+.RS 4n
+Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, he \fB-i\fR option will report that the POSIX entity has the total usage and quota for both.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs groupspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR]... [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR | \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are:
+.sp
+.in +2
+.nf
+-t posixgroup,smbgroup
+.fi
+.in -2
+.sp
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzfs mount\fR\fR
.ad
.sp .6
@@ -1597,7 +1903,7 @@ Mounts \fBZFS\fR file systems. Invoked automatically as part of the boot process
.ad
.sp .6
.RS 4n
-An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
+An optional, comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
.RE
.sp
@@ -1718,7 +2024,7 @@ Share all available \fBZFS\fR file systems. Invoked automatically as part of the
.ad
.sp .6
.RS 4n
-Share the specified filesystem according to the "sharenfs" and "sharesmb" properties. File systems are shared when the "sharenfs" or "sharesmb" property is set.
+Share the specified filesystem according to the \fBsharenfs\fR and \fBsharesmb\fR properties. File systems are shared when the \fBsharenfs\fR or \fBsharesmb\fR property is set.
.RE
.RE
@@ -1773,9 +2079,9 @@ Creates a stream representation of the second \fIsnapshot\fR, which is written t
.ad
.sp .6
.RS 4n
-Generate an incremental stream from the first \fIsnapshot\fR to the second \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be specified as the last component of the snapshot name (for example, the part after the "@"), and it is assumed to be from the same file system as the second \fIsnapshot\fR.
+Generate an incremental stream from the first \fIsnapshot\fR to the second \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be specified as the last component of the snapshot name (for example, the part after the \fB@\fR), and it is assumed to be from the same file system as the second \fIsnapshot\fR.
.sp
-If the destination is a clone, the source may be the origin snapshot, which must be fully specified (for example, "pool/fs@origin", not just "@origin").
+If the destination is a clone, the source may be the origin snapshot, which must be fully specified (for example, \fBpool/fs@origin\fR, not just \fB@origin\fR).
.RE
.sp
@@ -1786,7 +2092,7 @@ If the destination is a clone, the source may be the origin snapshot, which must
.ad
.sp .6
.RS 4n
-Generate a stream package that sends all intermediary snapshots from the first snapshot to the second snapshot. For example, "\fB-I @a fs@d\fR" is similar to "\fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR". The incremental source snapshot may be specified as with the \fB-i\fR option.
+Generate a stream package that sends all intermediary snapshots from the first snapshot to the second snapshot. For example, \fB-I @a fs@d\fR is similar to \fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR. The incremental source snapshot may be specified as with the \fB-i\fR option.
.RE
.sp
@@ -1797,9 +2103,9 @@ Generate a stream package that sends all intermediary snapshots from the first s
.ad
.sp .6
.RS 4n
-Generate a replication stream package, which will replicate the specified filesystem, and all descendant file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones are preserved.
+Generate a replication stream package, which will replicate the specified filesystem, and all descendent file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones are preserved.
.sp
-If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag, an incremental replication stream is generated. The current values of properties, and current snapshot and file system names are set when the stream is received. If the \fB-F\fR flag is specified when this stream is recieved, snapshots and file systems that do not exist on the sending side are destroyed.
+If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag, an incremental replication stream is generated. The current values of properties, and current snapshot and file system names are set when the stream is received. If the \fB-F\fR flag is specified when this stream is received, snapshots and file systems that do not exist on the sending side are destroyed.
.RE
.sp
@@ -1820,17 +2126,17 @@ The format of the stream is evolving. No backwards compatibility is guaranteed.
.ne 2
.mk
.na
-\fB\fBzfs receive\fR [\fB-vnF\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+\fB\fBzfs receive\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
.ad
.br
.na
-\fB\fBzfs receive\fR [\fB-vnF\fR] \fB-d\fR \fIfilesystem\fR\fR
+\fB\fBzfs receive\fR [\fB-vnFu\fR] \fB-d\fR \fIfilesystem\fR\fR
.ad
.sp .6
.RS 4n
-Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the "\fBzfs send\fR" subcommand, which by default creates a full stream. "\fBzfs recv\fR" can be used as an alias for "\fBzfs receive\fR".
+Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the \fBzfs send\fR subcommand, which by default creates a full stream. \fBzfs recv\fR can be used as an alias for \fBzfs receive\fR.
.sp
-If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and re-created, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation.
+If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and recreated, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation.
.sp
The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
.sp
@@ -1850,6 +2156,17 @@ Use the name of the sent snapshot to determine the name of the new snapshot as d
.ne 2
.mk
.na
+\fB\fB-u\fR\fR
+.ad
+.sp .6
+.RS 4n
+File system that is associated with the received stream is not mounted.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fB-v\fR\fR
.ad
.sp .6
@@ -1876,7 +2193,7 @@ Do not actually receive the stream. This can be useful in conjunction with the \
.ad
.sp .6
.RS 4n
-Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by "z\fBfs send -R -[iI]\fR"), destroy snapshots and file systems that do not exist on the sending side.
+Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side.
.RE
.RE
@@ -1885,11 +2202,22 @@ Force a rollback of the file system to the most recent snapshot before performin
.ne 2
.mk
.na
+\fB\fBzfs allow\fR \fIfilesystem\fR | \fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays permissions that have been delegated on the specified filesystem or volume. See the other forms of \fBzfs allow\fR for more information.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzfs allow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR| \fIvolume\fR\fR
.ad
.br
.na
-\fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+\fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR | \fIvolume\fR\fR
.ad
.sp .6
.RS 4n
@@ -1961,24 +2289,38 @@ aclinherit property
aclmode property
atime property
canmount property
+casesensitivity property
checksum property
compression property
copies property
devices property
exec property
+groupquota other Allows accessing any groupquota@... property.
+groupused other Allows reading any groupused@... property.
mountpoint property
+nbmand property
+normalization property
primarycache property
quota property
readonly property
recordsize property
+refquota property
+refreservation property
reservation property
secondarycache property
setuid property
shareiscsi property
sharenfs property
+sharesmb property
snapdir property
+utf8only property
+userprop other Allows changing any user property.
+userquota other Allows accessing any userquota@... property.
+userused other Allows reading any userused@... property.
version property
+volblocksize property
volsize property
+vscan property
xattr property
zoned property
userprop other Allows changing any user property.
@@ -2005,7 +2347,7 @@ Sets "create time" permissions. These permissions are granted (locally) to the c
.ad
.sp .6
.RS 4n
-Defines or adds permissions to a permission set. The set can be used by other \fBzfs allow\fR commands for the specified file system and its descendents. Sets are evaluated dynamically, so changes to a set are immediately reflected. Permission sets follow the same naming restrictions as ZFS file systems, but the name must begin with an "at sign" ("@"), and can be no more than 64 characters long.
+Defines or adds permissions to a permission set. The set can be used by other \fBzfs allow\fR commands for the specified file system and its descendents. Sets are evaluated dynamically, so changes to a set are immediately reflected. Permission sets follow the same naming restrictions as ZFS file systems, but the name must begin with an "at sign" (\fB@\fR), and can be no more than 64 characters long.
.RE
.sp
@@ -2028,7 +2370,7 @@ Defines or adds permissions to a permission set. The set can be used by other \f
.ad
.sp .6
.RS 4n
-Removes permissions that were granted with the "\fBzfs allow\fR" command. No permissions are explicitly denied, so other permissions granted are still in effect. For example, if the permission is granted by an ancestor. If no permissions are specified, then all permissions for the specified \fIuser\fR, \fIgroup\fR, or \fIeveryone\fR are removed. Specifying "everyone" (or using the \fB-e\fR option) only removes the permissions that were granted to "everyone", not all permissions for every user and group. See the "\fBzfs allow\fR" command for a description of the \fB-ldugec\fR options.
+Removes permissions that were granted with the \fBzfs allow\fR command. No permissions are explicitly denied, so other permissions granted are still in effect. For example, if the permission is granted by an ancestor. If no permissions are specified, then all permissions for the specified \fIuser\fR, \fIgroup\fR, or \fIeveryone\fR are removed. Specifying "everyone" (or using the \fB-e\fR option) only removes the permissions that were granted to "everyone", not all permissions for every user and group. See the \fBzfs allow\fR command for a description of the \fB-ldugec\fR options.
.sp
.ne 2
.mk
@@ -2062,14 +2404,14 @@ Removes permissions from a permission set. If no permissions are specified, then
\fBExample 1 \fRCreating a ZFS File System Hierarchy
.sp
.LP
-The following commands create a file system named "\fBpool/home\fR" and a file system named "\fBpool/home/bob\fR". The mount point "\fB/export/home\fR" is set for the parent file system, and automatically inherited by the child file system.
+The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and automatically inherited by the child file system.
.sp
.in +2
.nf
-# zfs create pool/home
-# zfs set mountpoint=/export/home pool/home
-# zfs create pool/home/bob
+# \fBzfs create pool/home\fR
+# \fBzfs set mountpoint=/export/home pool/home\fR
+# \fBzfs create pool/home/bob\fR
.fi
.in -2
.sp
@@ -2078,27 +2420,27 @@ The following commands create a file system named "\fBpool/home\fR" and a file s
\fBExample 2 \fRCreating a ZFS Snapshot
.sp
.LP
-The following command creates a snapshot named "yesterday". This snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of the "\fBpool/home/bob\fR" file system.
+The following command creates a snapshot named \fByesterday\fR. This snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of the \fBpool/home/bob\fR file system.
.sp
.in +2
.nf
-# zfs snapshot pool/home/bob@yesterday
+# \fBzfs snapshot pool/home/bob@yesterday\fR
.fi
.in -2
.sp
.LP
-\fBExample 3 \fRTaking and destroying multiple snapshots
+\fBExample 3 \fRTaking and Destroying Multiple Snapshots
.sp
.LP
-The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home\fR" and all of its descendent file systems. Each snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of its file system. The second command destroys the newly created snapshots.
+The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR and all of its descendent file systems. Each snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its file system. The second command destroys the newly created snapshots.
.sp
.in +2
.nf
# \fBzfs snapshot -r pool/home@yesterday\fR
-\fB# zfs destroy -r pool/home@yesterday\fR
+# \fBzfs destroy -r pool/home@yesterday\fR
.fi
.in -2
.sp
@@ -2107,13 +2449,13 @@ The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home
\fBExample 4 \fRTurning Off Compression
.sp
.LP
-The following commands turn compression off for all file systems under "\fBpool/home\fR", but explicitly turns it on for "\fBpool/home/anne\fR".
+The following commands turn compression off for all file systems under \fBpool/home\fR, but explicitly turns it on for \fBpool/home/anne\fR.
.sp
.in +2
.nf
-\fB# zfs set compression=off pool/home
-# zfs set compression=on pool/home/anne\fR
+# \fBzfs set compression=off pool/home\fR
+# \fBzfs set compression=on pool/home/anne\fR
.fi
.in -2
.sp
@@ -2122,12 +2464,12 @@ The following commands turn compression off for all file systems under "\fBpool/
\fBExample 5 \fRListing ZFS Datasets
.sp
.LP
-The following command lists all active file systems and volumes in the system. Snapshots are displayed if the "listsnaps" property is "on" (the default is "off") . See \fBzpool\fR(1M) for more information on pool properties.
+The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . See \fBzpool\fR(1M) for more information on pool properties.
.sp
.in +2
.nf
-\fB# zfs list\fR
+# \fBzfs list\fR
NAME USED AVAIL REFER MOUNTPOINT
@@ -2143,12 +2485,12 @@ The following command lists all active file systems and volumes in the system. S
\fBExample 6 \fRSetting a Quota on a ZFS File System
.sp
.LP
-The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
+The following command sets a quota of 50 Gbytes for \fBpool/home/bob\fR.
.sp
.in +2
.nf
-\fB# zfs set quota=50G pool/home/bob\fR
+# \fBzfs set quota=50G pool/home/bob\fR
.fi
.in -2
.sp
@@ -2157,12 +2499,12 @@ The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
\fBExample 7 \fRListing ZFS Properties
.sp
.LP
-The following command lists all properties for "\fBpool/home/bob\fR".
+The following command lists all properties for \fBpool/home/bob\fR.
.sp
.in +2
.nf
-\fB# zfs get all pool/home/bob\fR
+# \fBzfs get all pool/home/bob\fR
NAME PROPERTY VALUE SOURCE
@@ -2222,7 +2564,7 @@ The following command gets a single property value.
.sp
.in +2
.nf
-\fB# zfs get -H -o value compression pool/home/bob\fR
+# \fBzfs get -H -o value compression pool/home/bob\fR
on
.fi
.in -2
@@ -2230,12 +2572,12 @@ on
.sp
.LP
-The following command lists all properties with local settings for "\fBpool/home/bob\fR".
+The following command lists all properties with local settings for \fBpool/home/bob\fR.
.sp
.in +2
.nf
-\fB# zfs get -r -s local -o name,property,value all pool/home/bob\fR
+# \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR
NAME PROPERTY VALUE
pool compression on
@@ -2248,12 +2590,12 @@ The following command lists all properties with local settings for "\fBpool/home
\fBExample 8 \fRRolling Back a ZFS File System
.sp
.LP
-The following command reverts the contents of "\fBpool/home/anne\fR" to the snapshot named "\fByesterday\fR", deleting all intermediate snapshots.
+The following command reverts the contents of \fBpool/home/anne\fR to the snapshot named \fByesterday\fR, deleting all intermediate snapshots.
.sp
.in +2
.nf
-\fB# zfs rollback -r pool/home/anne@yesterday\fR
+# \fBzfs rollback -r pool/home/anne@yesterday\fR
.fi
.in -2
.sp
@@ -2262,12 +2604,12 @@ The following command reverts the contents of "\fBpool/home/anne\fR" to the snap
\fBExample 9 \fRCreating a ZFS Clone
.sp
.LP
-The following command creates a writable file system whose initial contents are the same as "\fBpool/home/bob@yesterday\fR".
+The following command creates a writable file system whose initial contents are the same as \fBpool/home/bob@yesterday\fR.
.sp
.in +2
.nf
-\fB# zfs clone pool/home/bob@yesterday pool/clone\fR
+# \fBzfs clone pool/home/bob@yesterday pool/clone\fR
.fi
.in -2
.sp
@@ -2281,17 +2623,16 @@ The following commands illustrate how to test out changes to a file system, and
.sp
.in +2
.nf
-\fB# zfs create pool/project/production\fR
+# \fBzfs create pool/project/production\fR
populate /pool/project/production with data
-\fB# zfs snapshot pool/project/production@today
-# zfs clone pool/project/production@today pool/project/beta\fR
- make changes to /pool/project/beta and test them
-\fB# zfs promote pool/project/beta
-# zfs rename pool/project/production pool/project/legacy
-# zfs rename pool/project/beta pool/project/production\fR
- once the legacy version is no longer needed, it can be
- destroyed
-\fB# zfs destroy pool/project/legacy\fR
+# \fBzfs snapshot pool/project/production@today\fR
+# \fBzfs clone pool/project/production@today pool/project/beta\fR
+make changes to /pool/project/beta and test them
+# \fBzfs promote pool/project/beta\fR
+# \fBzfs rename pool/project/production pool/project/legacy\fR
+# \fBzfs rename pool/project/beta pool/project/production\fR
+once the legacy version is no longer needed, it can be destroyed
+# \fBzfs destroy pool/project/legacy\fR
.fi
.in -2
.sp
@@ -2300,12 +2641,12 @@ The following commands illustrate how to test out changes to a file system, and
\fBExample 11 \fRInheriting ZFS Properties
.sp
.LP
-The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to inherit the "checksum" property from their parent.
+The following command causes \fBpool/home/bob\fR and \fBpool/home/anne\fR to inherit the \fBchecksum\fR property from their parent.
.sp
.in +2
.nf
-\fB# zfs inherit checksum pool/home/bob pool/home/anne\fR
+# \fBzfs inherit checksum pool/home/bob pool/home/anne\fR
.fi
.in -2
.sp
@@ -2314,29 +2655,29 @@ The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to
\fBExample 12 \fRRemotely Replicating ZFS Data
.sp
.LP
-The following commands send a full stream and then an incremental stream to a remote machine, restoring them into "\fBpoolB/received/fs\fR@a" and "\fBpoolB/received/fs@b\fR", respectively. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR", and must not initially contain "\fBpoolB/received/fs\fR".
+The following commands send a full stream and then an incremental stream to a remote machine, restoring them into \fBpoolB/received/fs@a\fRand \fBpoolB/received/fs@b\fR, respectively. \fBpoolB\fR must contain the file system \fBpoolB/received\fR, and must not initially contain \fBpoolB/received/fs\fR.
.sp
.in +2
.nf
-# zfs send pool/fs@a | \e
- ssh host zfs receive poolB/received/fs@a
-# zfs send -i a pool/fs@b | ssh host \e
- zfs receive poolB/received/fs
+# \fBzfs send pool/fs@a | \e\fR
+ \fBssh host zfs receive poolB/received/fs@a\fR
+# \fBzfs send -i a pool/fs@b | ssh host \e\fR
+ \fBzfs receive poolB/received/fs\fR
.fi
.in -2
.sp
.LP
-\fBExample 13 \fRUsing the zfs receive -d Option
+\fBExample 13 \fRUsing the \fBreceive\fR \fB-d\fR Option
.sp
.LP
-The following command sends a full stream of "\fBpoolA/fsA/fsB@snap\fR" to a remote machine, receiving it into "\fBpoolB/received/fsA/fsB@snap\fR". The "\fBfsA/fsB@snap\fR" portion of the received snapshot's name is determined from the name of the sent snapshot. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR". If "\fBpoolB/received/fsA\fR" does not exist, it is be created as an empty file system.
+The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from the name of the sent snapshot. \fBpoolB\fR must contain the file system \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created as an empty file system.
.sp
.in +2
.nf
-\fB# zfs send poolA/fsA/fsB@snap | \e
+# \fBzfs send poolA/fsA/fsB@snap | \e
ssh host zfs receive -d poolB/received\fR
.fi
.in -2
@@ -2346,18 +2687,18 @@ The following command sends a full stream of "\fBpoolA/fsA/fsB@snap\fR" to a rem
\fBExample 14 \fRSetting User Properties
.sp
.LP
-The following example sets the user defined "com.example:department" property for a dataset.
+The following example sets the user-defined \fBcom.example:department\fR property for a dataset.
.sp
.in +2
.nf
-\fB# zfs set com.example:department=12345 tank/accounting\fR
+# \fBzfs set com.example:department=12345 tank/accounting\fR
.fi
.in -2
.sp
.LP
-\fBExample 15 \fRCreating a ZFS Volume as a iSCSI Target Device
+\fBExample 15 \fRCreating a ZFS Volume as an iSCSI Target Device
.sp
.LP
The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target.
@@ -2365,10 +2706,10 @@ The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR t
.sp
.in +2
.nf
-\fB# zfs create -V 2g pool/volumes/vol1
- # zfs set shareiscsi=on pool/volumes/vol1
- # iscsitadm list target\fR
- Target: pool/volumes/vol1
+# \fBzfs create -V 2g pool/volumes/vol1\fR
+# \fBzfs set shareiscsi=on pool/volumes/vol1\fR
+# \fBiscsitadm list target\fR
+Target: pool/volumes/vol1
iSCSI Name:
iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c
Connections: 0
@@ -2378,7 +2719,7 @@ The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR t
.sp
.LP
-After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see the Solaris Administration Guide: Devices and File Systems.
+After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see \fBiscsitadm\fR(1M).
.LP
\fBExample 16 \fRPerforming a Rolling Snapshot
.sp
@@ -2388,27 +2729,29 @@ The following example shows how to maintain a history of snapshots with a consis
.sp
.in +2
.nf
-\fB# zfs destroy -r pool/users@7daysago
-# zfs rename -r pool/users@6daysago @7daysago
-# zfs rename -r pool/users@5daysago @6daysago
-\&...
-# zfs rename -r pool/users@yesterday @2daysago
-# zfs rename -r pool/users@today @yesterday
-# zfs snapshot -r pool/users@today\fR
+# \fBzfs destroy -r pool/users@7daysago\fR
+# \fBzfs rename -r pool/users@6daysago @7daysago\fR
+# \fBzfs rename -r pool/users@5daysago @6daysago\fR
+# \fBzfs rename -r pool/users@yesterday @5daysago\fR
+# \fBzfs rename -r pool/users@yesterday @4daysago\fR
+# \fBzfs rename -r pool/users@yesterday @3daysago\fR
+# \fBzfs rename -r pool/users@yesterday @2daysago\fR
+# \fBzfs rename -r pool/users@today @yesterday\fR
+# \fBzfs snapshot -r pool/users@today\fR
.fi
.in -2
.sp
.LP
-\fBExample 17 \fRSetting sharenfs Property Options on a ZFS File System
+\fBExample 17 \fRSetting \fBsharenfs\fR Property Options on a ZFS File System
.sp
.LP
-The following commands show how to set "sharenfs" property options to enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root access for system \fBneo\fR on the \fBtank/home\fR file system.
+The following commands show how to set \fBsharenfs\fR property options to enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root access for system \fBneo\fR on the \fBtank/home\fR file system.
.sp
.in +2
.nf
-\fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
+# \fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
.fi
.in -2
@@ -2422,13 +2765,13 @@ If you are using \fBDNS\fR for host name resolution, specify the fully qualified
\fBExample 18 \fRDelegating ZFS Administration Permissions on a ZFS Dataset
.sp
.LP
-The following example shows how to set permissions so that user "\fBcindys\fR" can create, destroy, mount and take snapshots on \fBtank/cindys\fR. The permissions on \fBtank/cindys\fR are also displayed.
+The following example shows how to set permissions so that user \fBcindys\fR can create, destroy, mount, and take snapshots on \fBtank/cindys\fR. The permissions on \fBtank/cindys\fR are also displayed.
.sp
.in +2
.nf
-\fB# zfs allow cindys create,destroy,mount,snapshot tank/cindys
-# zfs allow tank/cindys\fR
+# \fB# zfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
+# \fBzfs allow tank/cindys\fR
-------------------------------------------------------------
Local+Descendent permissions on (tank/cindys)
user cindys create,destroy,mount,snapshot
@@ -2444,7 +2787,7 @@ Because the \fBtank/cindys\fR mount point permission is set to 755 by default, u
.sp
.in +2
.nf
-# chmod A+user:cindys:add_subdirectory:allow /tank/cindys
+# \fBchmod A+user:cindys:add_subdirectory:allow /tank/cindys\fR
.fi
.in -2
.sp
@@ -2458,9 +2801,9 @@ The following example shows how to grant anyone in the group \fBstaff\fR to crea
.sp
.in +2
.nf
-\fB# zfs allow staff create,mount tank/users
-# zfs allow -c destroy tank/users
-# zfs allow tank/users\fR
+# \fB# zfs allow staff create,mount tank/users\fR
+# \fBzfs allow -c destroy tank/users\fR
+# \fBzfs allow tank/users\fR
-------------------------------------------------------------
Create time permissions on (tank/users)
create,destroy
@@ -2480,9 +2823,9 @@ The following example shows how to define and grant a permission set on the \fBt
.sp
.in +2
.nf
-\fB# zfs allow -s @pset create,destroy,snapshot,mount tank/users
-# zfs allow staff @pset tank/users
-# zfs allow tank/users
+# \fBzfs allow -s @pset create,destroy,snapshot,mount tank/users\fR
+# \fBzfs allow staff @pset tank/users\fR
+# \fBzfs allow tank/users\fR
-------------------------------------------------------------
Permission sets on (tank/users)
@pset create,destroy,mount,snapshot
@@ -2490,7 +2833,7 @@ Create time permissions on (tank/users)
create,destroy
Local+Descendent permissions on (tank/users)
group staff @pset,create,mount
--------------------------------------------------------------\fR
+-------------------------------------------------------------
.fi
.in -2
.sp
@@ -2504,8 +2847,8 @@ The following example shows to grant the ability to set quotas and reservations
.sp
.in +2
.nf
-\fB# zfs allow cindys quota,reservation users/home
-# zfs allow users/home\fR
+# \fBzfs allow cindys quota,reservation users/home\fR
+# \fBzfs allow users/home\fR
-------------------------------------------------------------
Local+Descendent permissions on (users/home)
user cindys quota,reservation
@@ -2527,8 +2870,8 @@ The following example shows how to remove the snapshot permission from the \fBst
.sp
.in +2
.nf
-\fB# zfs unallow staff snapshot tank/users
-# zfs allow tank/users\fR
+# \fBzfs unallow staff snapshot tank/users\fR
+# \fBzfs allow tank/users\fR
-------------------------------------------------------------
Permission sets on (tank/users)
@pset create,destroy,mount,snapshot
@@ -2600,7 +2943,10 @@ Interface StabilityCommitted
.SH SEE ALSO
.sp
.LP
-\fBgzip\fR(1), \fBssh\fR(1), \fBmount\fR(1M), \fBshare\fR(1M), \fBsharemgr\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBfsync\fR(3c), \fBdfstab\fR(4), \fBattributes\fR(5)
+\fBssh\fR(1), \fBiscsitadm\fR(1M), \fBmount\fR(1M), \fBshare\fR(1M), \fBsharemgr\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBwrite\fR(2), \fBfsync\fR(3C), \fBdfstab\fR(4), \fBattributes\fR(5)
+.sp
+.LP
+See the \fBgzip\fR(1) man page, which is not part of the SunOS man page collection.
.sp
.LP
For information about using the \fBZFS\fR web-based management tool and other \fBZFS\fR features, see the \fISolaris ZFS Administration Guide\fR.
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
index a9d3c01bec2d..2241f9c42d55 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -39,11 +39,13 @@
#include <unistd.h>
#include <fcntl.h>
#include <zone.h>
+#include <grp.h>
+#include <pwd.h>
#include <sys/mntent.h>
#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/stat.h>
-#include <sys/avl.h>
+#include <sys/fs/zfs.h>
#include <libzfs.h>
#include <libuutil.h>
@@ -55,6 +57,7 @@ libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
static char history_str[HIS_MAX_RECORD_LEN];
+const char *pypath = "/usr/lib/zfs/pyzfs.py";
static int zfs_do_clone(int argc, char **argv);
static int zfs_do_create(int argc, char **argv);
@@ -74,8 +77,8 @@ static int zfs_do_unshare(int argc, char **argv);
static int zfs_do_send(int argc, char **argv);
static int zfs_do_receive(int argc, char **argv);
static int zfs_do_promote(int argc, char **argv);
-static int zfs_do_allow(int argc, char **argv);
-static int zfs_do_unallow(int argc, char **argv);
+static int zfs_do_userspace(int argc, char **argv);
+static int zfs_do_python(int argc, char **argv);
static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
@@ -119,7 +122,9 @@ typedef enum {
HELP_UNMOUNT,
HELP_UNSHARE,
HELP_ALLOW,
- HELP_UNALLOW
+ HELP_UNALLOW,
+ HELP_USERSPACE,
+ HELP_GROUPSPACE
} zfs_help_t;
typedef struct zfs_command {
@@ -153,6 +158,8 @@ static zfs_command_t command_table[] = {
{ "get", zfs_do_get, HELP_GET },
{ "inherit", zfs_do_inherit, HELP_INHERIT },
{ "upgrade", zfs_do_upgrade, HELP_UPGRADE },
+ { "userspace", zfs_do_userspace, HELP_USERSPACE },
+ { "groupspace", zfs_do_userspace, HELP_GROUPSPACE },
{ NULL },
{ "mount", zfs_do_mount, HELP_MOUNT },
{ "unmount", zfs_do_unmount, HELP_UNMOUNT },
@@ -162,9 +169,9 @@ static zfs_command_t command_table[] = {
{ "send", zfs_do_send, HELP_SEND },
{ "receive", zfs_do_receive, HELP_RECEIVE },
{ NULL },
- { "allow", zfs_do_allow, HELP_ALLOW },
+ { "allow", zfs_do_python, HELP_ALLOW },
{ NULL },
- { "unallow", zfs_do_unallow, HELP_UNALLOW },
+ { "unallow", zfs_do_python, HELP_UNALLOW },
{ NULL },
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
@@ -260,6 +267,14 @@ get_usage(zfs_help_t idx)
"<filesystem|volume>\n"
"\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
"<filesystem|volume>\n"));
+ case HELP_USERSPACE:
+ return (gettext("\tuserspace [-hniHp] [-o field[,...]] "
+ "[-sS field] ... [-t type[,...]]\n"
+ "\t <filesystem|snapshot>\n"));
+ case HELP_GROUPSPACE:
+ return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
+ "[-sS field] ... [-t type[,...]]\n"
+ "\t <filesystem|snapshot>\n"));
}
abort();
@@ -321,7 +336,6 @@ usage(boolean_t requested)
{
int i;
boolean_t show_properties = B_FALSE;
- boolean_t show_permissions = B_FALSE;
FILE *fp = requested ? stdout : stderr;
if (current_command == NULL) {
@@ -352,13 +366,7 @@ usage(boolean_t requested)
strcmp(current_command->name, "list") == 0))
show_properties = B_TRUE;
- if (current_command != NULL &&
- (strcmp(current_command->name, "allow") == 0 ||
- strcmp(current_command->name, "unallow") == 0))
- show_permissions = B_TRUE;
-
if (show_properties) {
-
(void) fprintf(fp,
gettext("\nThe following properties are supported:\n"));
@@ -369,29 +377,33 @@ usage(boolean_t requested)
(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
ZFS_TYPE_DATASET);
+ (void) fprintf(fp, "\t%-15s ", "userused@...");
+ (void) fprintf(fp, " NO NO <size>\n");
+ (void) fprintf(fp, "\t%-15s ", "groupused@...");
+ (void) fprintf(fp, " NO NO <size>\n");
+ (void) fprintf(fp, "\t%-15s ", "userquota@...");
+ (void) fprintf(fp, "YES NO <size> | none\n");
+ (void) fprintf(fp, "\t%-15s ", "groupquota@...");
+ (void) fprintf(fp, "YES NO <size> | none\n");
+
(void) fprintf(fp, gettext("\nSizes are specified in bytes "
"with standard units such as K, M, G, etc.\n"));
(void) fprintf(fp, gettext("\nUser-defined properties can "
"be specified by using a name containing a colon (:).\n"));
-
- } else if (show_permissions) {
- (void) fprintf(fp,
- gettext("\nThe following permissions are supported:\n"));
-
- zfs_deleg_permissions();
+ (void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ "
+ "properties must be appended with\n"
+ "a user or group specifier of one of these forms:\n"
+ " POSIX name (eg: \"matt\")\n"
+ " POSIX id (eg: \"126829\")\n"
+ " SMB name@domain (eg: \"matt@sun\")\n"
+ " SMB SID (eg: \"S-1-234-567-89\")\n"));
} else {
- /*
- * TRANSLATION NOTE:
- * "zfs set|get" must not be localised this is the
- * command name and arguments.
- */
-
(void) fprintf(fp,
- gettext("\nFor the property list, run: zfs set|get\n"));
-
+ gettext("\nFor the property list, run: %s\n"),
+ "zfs set|get");
(void) fprintf(fp,
- gettext("\nFor the delegated permission list, run:"
- " zfs allow|unallow\n"));
+ gettext("\nFor the delegated permission list, run: %s\n"),
+ "zfs allow|unallow");
}
/*
@@ -429,7 +441,6 @@ parseprop(nvlist_t *props)
return (-1);
}
return (0);
-
}
static int
@@ -1101,6 +1112,17 @@ get_callback(zfs_handle_t *zhp, void *data)
zprop_print_one_property(zfs_get_name(zhp), cbp,
zfs_prop_to_name(pl->pl_prop),
buf, sourcetype, source);
+ } else if (zfs_prop_userquota(pl->pl_user_prop)) {
+ sourcetype = ZPROP_SRC_LOCAL;
+
+ if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
+ buf, sizeof (buf), cbp->cb_literal) != 0) {
+ sourcetype = ZPROP_SRC_NONE;
+ (void) strlcpy(buf, "-", sizeof (buf));
+ }
+
+ zprop_print_one_property(zfs_get_name(zhp), cbp,
+ pl->pl_user_prop, buf, sourcetype, source);
} else {
if (nvlist_lookup_nvlist(userprop,
pl->pl_user_prop, &propval) != 0) {
@@ -1477,21 +1499,30 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
{
upgrade_cbdata_t *cb = data;
int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
-
- if (cb->cb_version >= ZPL_VERSION_FUID) {
- int spa_version;
-
- if (zfs_spa_version(zhp, &spa_version) < 0)
- return (-1);
-
- if (spa_version < SPA_VERSION_FUID) {
- /* can't upgrade */
- (void) printf(gettext("%s: can not be upgraded; "
- "the pool version needs to first be upgraded\nto "
- "version %d\n\n"),
- zfs_get_name(zhp), SPA_VERSION_FUID);
- cb->cb_numfailed++;
- return (0);
+ int i;
+ static struct { int zplver; int spaver; } table[] = {
+ {ZPL_VERSION_FUID, SPA_VERSION_FUID},
+ {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
+ {0, 0}
+ };
+
+
+ for (i = 0; table[i].zplver; i++) {
+ if (cb->cb_version >= table[i].zplver) {
+ int spa_version;
+
+ if (zfs_spa_version(zhp, &spa_version) < 0)
+ return (-1);
+
+ if (spa_version < table[i].spaver) {
+ /* can't upgrade */
+ (void) printf(gettext("%s: can not be "
+ "upgraded; the pool version needs to first "
+ "be upgraded\nto version %d\n\n"),
+ zfs_get_name(zhp), table[i].spaver);
+ cb->cb_numfailed++;
+ return (0);
+ }
}
}
@@ -1592,6 +1623,8 @@ zfs_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 2 Enhanced directory entries\n"));
(void) printf(gettext(" 3 Case insensitive and File system "
"unique identifer (FUID)\n"));
+ (void) printf(gettext(" 4 userquota, groupquota "
+ "properties\n"));
(void) printf(gettext("\nFor more information on a particular "
"version, including supported releases, see:\n\n"));
(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -1640,6 +1673,84 @@ zfs_do_upgrade(int argc, char **argv)
}
/*
+ * zfs userspace
+ */
+static int
+userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
+{
+ zfs_userquota_prop_t *typep = arg;
+ zfs_userquota_prop_t p = *typep;
+ char *name = NULL;
+ char *ug, *propname;
+ char namebuf[32];
+ char sizebuf[32];
+
+ if (domain == NULL || domain[0] == '\0') {
+ if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) {
+ struct group *g = getgrgid(rid);
+ if (g)
+ name = g->gr_name;
+ } else {
+ struct passwd *p = getpwuid(rid);
+ if (p)
+ name = p->pw_name;
+ }
+ }
+
+ if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA)
+ ug = "group";
+ else
+ ug = "user";
+
+ if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED)
+ propname = "used";
+ else
+ propname = "quota";
+
+ if (name == NULL) {
+ (void) snprintf(namebuf, sizeof (namebuf),
+ "%llu", (longlong_t)rid);
+ name = namebuf;
+ }
+ zfs_nicenum(space, sizebuf, sizeof (sizebuf));
+
+ (void) printf("%s %s %s%c%s %s\n", propname, ug, domain,
+ domain[0] ? '-' : ' ', name, sizebuf);
+
+ return (0);
+}
+
+static int
+zfs_do_userspace(int argc, char **argv)
+{
+ zfs_handle_t *zhp;
+ zfs_userquota_prop_t p;
+ int error;
+
+ /*
+ * Try the python version. If the execv fails, we'll continue
+ * and do a simplistic implementation.
+ */
+ (void) execv(pypath, argv-1);
+
+ (void) printf("internal error: %s not found\n"
+ "falling back on built-in implementation, "
+ "some features will not work\n", pypath);
+
+ if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL)
+ return (1);
+
+ (void) printf("PROP TYPE NAME VALUE\n");
+
+ for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
+ error = zfs_userspace(zhp, p, userspace_cb, &p);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+/*
* list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...]
* [-s property [-s property]...] [-S property [-S property]...]
* <dataset> ...
@@ -1728,7 +1839,6 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
first = B_FALSE;
}
- right_justify = B_FALSE;
if (pl->pl_prop != ZPROP_INVAL) {
if (zfs_prop_get(zhp, pl->pl_prop, property,
sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
@@ -1737,6 +1847,13 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
propstr = property;
right_justify = zfs_prop_align_right(pl->pl_prop);
+ } else if (zfs_prop_userquota(pl->pl_user_prop)) {
+ if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
+ property, sizeof (property), B_FALSE) != 0)
+ propstr = "-";
+ else
+ propstr = property;
+ right_justify = B_TRUE;
} else {
if (nvlist_lookup_nvlist(userprops,
pl->pl_user_prop, &propval) != 0)
@@ -1744,6 +1861,7 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
else
verify(nvlist_lookup_string(propval,
ZPROP_VALUE, &propstr) == 0);
+ right_justify = B_FALSE;
}
width = pl->pl_width;
@@ -2281,7 +2399,7 @@ zfs_do_set(int argc, char **argv)
usage(B_FALSE);
}
- ret = zfs_for_each(argc - 2, argv + 2, NULL,
+ ret = zfs_for_each(argc - 2, argv + 2, 0,
ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb);
return (ret);
@@ -2542,388 +2660,6 @@ zfs_do_receive(int argc, char **argv)
return (err != 0);
}
-typedef struct allow_cb {
- int a_permcnt;
- size_t a_treeoffset;
-} allow_cb_t;
-
-static void
-zfs_print_perms(avl_tree_t *tree)
-{
- zfs_perm_node_t *permnode;
-
- permnode = avl_first(tree);
- while (permnode != NULL) {
- (void) printf("%s", permnode->z_pname);
- permnode = AVL_NEXT(tree, permnode);
- if (permnode)
- (void) printf(",");
- else
- (void) printf("\n");
- }
-}
-
-/*
- * Iterate over user/groups/everyone/... and the call perm_iter
- * function to print actual permission when tree has >0 nodes.
- */
-static void
-zfs_iter_perms(avl_tree_t *tree, const char *banner, allow_cb_t *cb)
-{
- zfs_allow_node_t *item;
- avl_tree_t *ptree;
-
- item = avl_first(tree);
- while (item) {
- ptree = (void *)((char *)item + cb->a_treeoffset);
- if (avl_numnodes(ptree)) {
- if (cb->a_permcnt++ == 0)
- (void) printf("%s\n", banner);
- (void) printf("\t%s", item->z_key);
- /*
- * Avoid an extra space being printed
- * for "everyone" which is keyed with a null
- * string
- */
- if (item->z_key[0] != '\0')
- (void) printf(" ");
- zfs_print_perms(ptree);
- }
- item = AVL_NEXT(tree, item);
- }
-}
-
-#define LINES "-------------------------------------------------------------\n"
-static int
-zfs_print_allows(char *ds)
-{
- zfs_allow_t *curperms, *perms;
- zfs_handle_t *zhp;
- allow_cb_t allowcb = { 0 };
- char banner[MAXPATHLEN];
-
- if (ds[0] == '-')
- usage(B_FALSE);
-
- if (strrchr(ds, '@')) {
- (void) fprintf(stderr, gettext("Snapshots don't have 'allow'"
- " permissions\n"));
- return (1);
- }
- if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
- return (1);
-
- if (zfs_perm_get(zhp, &perms)) {
- (void) fprintf(stderr,
- gettext("Failed to retrieve 'allows' on %s\n"), ds);
- zfs_close(zhp);
- return (1);
- }
-
- zfs_close(zhp);
-
- if (perms != NULL)
- (void) printf("%s", LINES);
- for (curperms = perms; curperms; curperms = curperms->z_next) {
-
- (void) snprintf(banner, sizeof (banner),
- "Permission sets on (%s)", curperms->z_setpoint);
- allowcb.a_treeoffset =
- offsetof(zfs_allow_node_t, z_localdescend);
- allowcb.a_permcnt = 0;
- zfs_iter_perms(&curperms->z_sets, banner, &allowcb);
-
- (void) snprintf(banner, sizeof (banner),
- "Create time permissions on (%s)", curperms->z_setpoint);
- allowcb.a_treeoffset =
- offsetof(zfs_allow_node_t, z_localdescend);
- allowcb.a_permcnt = 0;
- zfs_iter_perms(&curperms->z_crperms, banner, &allowcb);
-
-
- (void) snprintf(banner, sizeof (banner),
- "Local permissions on (%s)", curperms->z_setpoint);
- allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_local);
- allowcb.a_permcnt = 0;
- zfs_iter_perms(&curperms->z_user, banner, &allowcb);
- zfs_iter_perms(&curperms->z_group, banner, &allowcb);
- zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
- (void) snprintf(banner, sizeof (banner),
- "Descendent permissions on (%s)", curperms->z_setpoint);
- allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_descend);
- allowcb.a_permcnt = 0;
- zfs_iter_perms(&curperms->z_user, banner, &allowcb);
- zfs_iter_perms(&curperms->z_group, banner, &allowcb);
- zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
- (void) snprintf(banner, sizeof (banner),
- "Local+Descendent permissions on (%s)",
- curperms->z_setpoint);
- allowcb.a_treeoffset =
- offsetof(zfs_allow_node_t, z_localdescend);
- allowcb.a_permcnt = 0;
- zfs_iter_perms(&curperms->z_user, banner, &allowcb);
- zfs_iter_perms(&curperms->z_group, banner, &allowcb);
- zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
- (void) printf("%s", LINES);
- }
- zfs_free_allows(perms);
- return (0);
-}
-
-#define ALLOWOPTIONS "ldcsu:g:e"
-#define UNALLOWOPTIONS "ldcsu:g:er"
-
-/*
- * Validate options, and build necessary datastructure to display/remove/add
- * permissions.
- * Returns 0 - If permissions should be added/removed
- * Returns 1 - If permissions should be displayed.
- * Returns -1 - on failure
- */
-int
-parse_allow_args(int *argc, char **argv[], boolean_t unallow,
- char **ds, int *recurse, nvlist_t **zperms)
-{
- int c;
- char *options = unallow ? UNALLOWOPTIONS : ALLOWOPTIONS;
- zfs_deleg_inherit_t deleg_type = ZFS_DELEG_NONE;
- zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
- char *who = NULL;
- char *perms = NULL;
- zfs_handle_t *zhp;
-
- while ((c = getopt(*argc, *argv, options)) != -1) {
- switch (c) {
- case 'l':
- if (who_type == ZFS_DELEG_CREATE ||
- who_type == ZFS_DELEG_NAMED_SET)
- usage(B_FALSE);
-
- deleg_type |= ZFS_DELEG_PERM_LOCAL;
- break;
- case 'd':
- if (who_type == ZFS_DELEG_CREATE ||
- who_type == ZFS_DELEG_NAMED_SET)
- usage(B_FALSE);
-
- deleg_type |= ZFS_DELEG_PERM_DESCENDENT;
- break;
- case 'r':
- *recurse = B_TRUE;
- break;
- case 'c':
- if (who_type != ZFS_DELEG_WHO_UNKNOWN)
- usage(B_FALSE);
- if (deleg_type)
- usage(B_FALSE);
- who_type = ZFS_DELEG_CREATE;
- break;
- case 's':
- if (who_type != ZFS_DELEG_WHO_UNKNOWN)
- usage(B_FALSE);
- if (deleg_type)
- usage(B_FALSE);
- who_type = ZFS_DELEG_NAMED_SET;
- break;
- case 'u':
- if (who_type != ZFS_DELEG_WHO_UNKNOWN)
- usage(B_FALSE);
- who_type = ZFS_DELEG_USER;
- who = optarg;
- break;
- case 'g':
- if (who_type != ZFS_DELEG_WHO_UNKNOWN)
- usage(B_FALSE);
- who_type = ZFS_DELEG_GROUP;
- who = optarg;
- break;
- case 'e':
- if (who_type != ZFS_DELEG_WHO_UNKNOWN)
- usage(B_FALSE);
- who_type = ZFS_DELEG_EVERYONE;
- break;
- default:
- usage(B_FALSE);
- break;
- }
- }
-
- if (deleg_type == 0)
- deleg_type = ZFS_DELEG_PERM_LOCALDESCENDENT;
-
- *argc -= optind;
- *argv += optind;
-
- if (unallow == B_FALSE && *argc == 1) {
- /*
- * Only print permissions if no options were processed
- */
- if (optind == 1)
- return (1);
- else
- usage(B_FALSE);
- }
-
- /*
- * initialize variables for zfs_build_perms based on number
- * of arguments.
- * 3 arguments ==> zfs [un]allow joe perm,perm,perm <dataset> or
- * zfs [un]allow -s @set1 perm,perm <dataset>
- * 2 arguments ==> zfs [un]allow -c perm,perm <dataset> or
- * zfs [un]allow -u|-g <name> perm <dataset> or
- * zfs [un]allow -e perm,perm <dataset>
- * zfs unallow joe <dataset>
- * zfs unallow -s @set1 <dataset>
- * 1 argument ==> zfs [un]allow -e <dataset> or
- * zfs [un]allow -c <dataset>
- */
-
- switch (*argc) {
- case 3:
- perms = (*argv)[1];
- who = (*argv)[0];
- *ds = (*argv)[2];
-
- /*
- * advance argc/argv for do_allow cases.
- * for do_allow case make sure who have a know who type
- * and its not a permission set.
- */
- if (unallow == B_TRUE) {
- *argc -= 2;
- *argv += 2;
- } else if (who_type != ZFS_DELEG_WHO_UNKNOWN &&
- who_type != ZFS_DELEG_NAMED_SET)
- usage(B_FALSE);
- break;
-
- case 2:
- if (unallow == B_TRUE && (who_type == ZFS_DELEG_EVERYONE ||
- who_type == ZFS_DELEG_CREATE || who != NULL)) {
- perms = (*argv)[0];
- *ds = (*argv)[1];
- } else {
- if (unallow == B_FALSE &&
- (who_type == ZFS_DELEG_WHO_UNKNOWN ||
- who_type == ZFS_DELEG_NAMED_SET))
- usage(B_FALSE);
- else if (who_type == ZFS_DELEG_WHO_UNKNOWN ||
- who_type == ZFS_DELEG_NAMED_SET)
- who = (*argv)[0];
- else if (who_type != ZFS_DELEG_NAMED_SET)
- perms = (*argv)[0];
- *ds = (*argv)[1];
- }
- if (unallow == B_TRUE) {
- (*argc)--;
- (*argv)++;
- }
- break;
-
- case 1:
- if (unallow == B_FALSE)
- usage(B_FALSE);
- if (who == NULL && who_type != ZFS_DELEG_CREATE &&
- who_type != ZFS_DELEG_EVERYONE)
- usage(B_FALSE);
- *ds = (*argv)[0];
- break;
-
- default:
- usage(B_FALSE);
- }
-
- if (strrchr(*ds, '@')) {
- (void) fprintf(stderr,
- gettext("Can't set or remove 'allow' permissions "
- "on snapshots.\n"));
- return (-1);
- }
-
- if ((zhp = zfs_open(g_zfs, *ds, ZFS_TYPE_DATASET)) == NULL)
- return (-1);
-
- if ((zfs_build_perms(zhp, who, perms,
- who_type, deleg_type, zperms)) != 0) {
- zfs_close(zhp);
- return (-1);
- }
- zfs_close(zhp);
- return (0);
-}
-
-static int
-zfs_do_allow(int argc, char **argv)
-{
- char *ds;
- nvlist_t *zperms = NULL;
- zfs_handle_t *zhp;
- int unused;
- int ret;
-
- if ((ret = parse_allow_args(&argc, &argv, B_FALSE, &ds,
- &unused, &zperms)) == -1)
- return (1);
-
- if (ret == 1)
- return (zfs_print_allows(argv[0]));
-
- if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
- return (1);
-
- if (zfs_perm_set(zhp, zperms)) {
- zfs_close(zhp);
- nvlist_free(zperms);
- return (1);
- }
- nvlist_free(zperms);
- zfs_close(zhp);
-
- return (0);
-}
-
-static int
-unallow_callback(zfs_handle_t *zhp, void *data)
-{
- nvlist_t *nvp = (nvlist_t *)data;
- int error;
-
- error = zfs_perm_remove(zhp, nvp);
- if (error) {
- (void) fprintf(stderr, gettext("Failed to remove permissions "
- "on %s\n"), zfs_get_name(zhp));
- }
- return (error);
-}
-
-static int
-zfs_do_unallow(int argc, char **argv)
-{
- int recurse = B_FALSE;
- char *ds;
- int error;
- nvlist_t *zperms = NULL;
- int flags = 0;
-
- if (parse_allow_args(&argc, &argv, B_TRUE,
- &ds, &recurse, &zperms) == -1)
- return (1);
-
- if (recurse)
- flags |= ZFS_ITER_RECURSE;
- error = zfs_for_each(argc, argv, flags,
- ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME, NULL,
- NULL, 0, unallow_callback, (void *)zperms);
-
- if (zperms)
- nvlist_free(zperms);
-
- return (error);
-}
-
typedef struct get_all_cbdata {
zfs_handle_t **cb_handles;
size_t cb_alloc;
@@ -3114,7 +2850,6 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
strcmp(smbshareopts, "off") == 0) {
@@ -3124,7 +2859,8 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
(void) fprintf(stderr, gettext("cannot share '%s': "
"legacy share\n"), zfs_get_name(zhp));
(void) fprintf(stderr, gettext("use share(1M) to "
- "share this filesystem\n"));
+ "share this filesystem, or set "
+ "sharenfs property on\n"));
return (1);
}
@@ -3162,6 +2898,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
* noauto no return 0
* noauto yes pass through
*/
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
if (canmount == ZFS_CANMOUNT_OFF) {
if (!explicit)
return (0);
@@ -4055,6 +3792,15 @@ zfs_do_unjail(int argc, char **argv)
return (do_jail(argc, argv, 0));
}
+/* ARGSUSED */
+static int
+zfs_do_python(int argc, char **argv)
+{
+ (void) execv(pypath, argv-1);
+ (void) printf("internal error: %s not found\n", pypath);
+ return (-1);
+}
+
/*
* Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
* 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
@@ -4312,6 +4058,7 @@ main(int argc, char **argv)
/*
* Run the appropriate command.
*/
+ libzfs_mnttab_cache(g_zfs, B_TRUE);
if (find_command_idx(cmdname, &i) == 0) {
current_command = &command_table[i];
ret = command_table[i].func(argc - 1, argv + 1);
@@ -4324,6 +4071,7 @@ main(int argc, char **argv)
"command '%s'\n"), cmdname);
usage(B_FALSE);
}
+ libzfs_mnttab_cache(g_zfs, B_FALSE);
}
(void) fclose(mnttab_file);
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
index eef60e6dedbf..abfd062d7905 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -378,12 +378,11 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props,
}
normnm = zpool_prop_to_name(prop);
} else {
- if ((fprop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
- (void) fprintf(stderr, gettext("property '%s' is "
- "not a valid file system property\n"), propname);
- return (2);
+ if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
+ normnm = zfs_prop_to_name(fprop);
+ } else {
+ normnm = propname;
}
- normnm = zfs_prop_to_name(fprop);
}
if (nvlist_lookup_string(proplist, normnm, &strval) == 0 &&
@@ -1263,7 +1262,7 @@ show_import(nvlist_t *config)
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
- int force, nvlist_t *props, boolean_t allowfaulted)
+ int force, nvlist_t *props, boolean_t do_verbatim)
{
zpool_handle_t *zhp;
char *name;
@@ -1316,16 +1315,17 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
}
- if (zpool_import_props(g_zfs, config, newname, props,
- allowfaulted) != 0)
+ if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0)
return (1);
if (newname != NULL)
name = (char *)newname;
- verify((zhp = zpool_open_canfail(g_zfs, name)) != NULL);
+ if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
+ return (1);
- if (zpool_enable_datasets(zhp, mntopts, 0) != 0) {
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ zpool_enable_datasets(zhp, mntopts, 0) != 0) {
zpool_close(zhp);
return (1);
}
@@ -1359,7 +1359,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* -F Import even in the presence of faulted vdevs. This is an
* intentionally undocumented option for testing purposes, and
* treats the pool configuration as complete, leaving any bad
- * vdevs in the FAULTED state.
+ * vdevs in the FAULTED state. In other words, it does verbatim
+ * import.
*
* -a Import all pools found.
*
@@ -1388,7 +1389,7 @@ zpool_do_import(int argc, char **argv)
nvlist_t *found_config;
nvlist_t *props = NULL;
boolean_t first;
- boolean_t allow_faulted = B_FALSE;
+ boolean_t do_verbatim = B_FALSE;
uint64_t pool_state;
char *cachefile = NULL;
@@ -1421,7 +1422,7 @@ zpool_do_import(int argc, char **argv)
do_force = B_TRUE;
break;
case 'F':
- allow_faulted = B_TRUE;
+ do_verbatim = B_TRUE;
break;
case 'o':
if ((propval = strchr(optarg, '=')) != NULL) {
@@ -1571,7 +1572,7 @@ zpool_do_import(int argc, char **argv)
if (do_all)
err |= do_import(config, NULL, mntopts,
- do_force, props, allow_faulted);
+ do_force, props, do_verbatim);
else
show_import(config);
} else if (searchname != NULL) {
@@ -1619,7 +1620,7 @@ zpool_do_import(int argc, char **argv)
err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, do_force, props, allow_faulted);
+ argv[1], mntopts, do_force, props, do_verbatim);
}
}
@@ -2766,7 +2767,7 @@ find_spare(zpool_handle_t *zhp, void *data)
*/
void
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
- int namewidth, int depth, boolean_t isspare, boolean_t print_logs)
+ int namewidth, int depth, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
@@ -2880,13 +2881,14 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
for (c = 0; c < children; c++) {
uint64_t is_log = B_FALSE;
+ /* Don't print logs here */
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
&is_log);
- if ((is_log && !print_logs) || (!is_log && print_logs))
+ if (is_log)
continue;
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
print_status_config(zhp, vname, child[c],
- namewidth, depth + 2, isspare, B_FALSE);
+ namewidth, depth + 2, isspare);
free(vname);
}
}
@@ -2941,7 +2943,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
for (i = 0; i < nspares; i++) {
name = zpool_vdev_name(g_zfs, zhp, spares[i]);
print_status_config(zhp, name, spares[i],
- namewidth, 2, B_TRUE, B_FALSE);
+ namewidth, 2, B_TRUE);
free(name);
}
}
@@ -2961,7 +2963,40 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
for (i = 0; i < nl2cache; i++) {
name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
print_status_config(zhp, name, l2cache[i],
- namewidth, 2, B_FALSE, B_FALSE);
+ namewidth, 2, B_FALSE);
+ free(name);
+ }
+}
+
+/*
+ * Print log vdevs.
+ * Logs are recorded as top level vdevs in the main pool child array but with
+ * "is_log" set to 1. We use print_status_config() to print the top level logs
+ * then any log children (eg mirrored slogs) are printed recursively - which
+ * works because only the top level vdev is marked "is_log"
+ */
+static void
+print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth)
+{
+ uint_t c, children;
+ nvlist_t **child;
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) != 0)
+ return;
+
+ (void) printf(gettext("\tlogs\n"));
+
+ for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE;
+ char *name;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if (!is_log)
+ continue;
+ name = zpool_vdev_name(g_zfs, zhp, child[c]);
+ print_status_config(zhp, name, child[c], namewidth, 2, B_FALSE);
free(name);
}
}
@@ -3191,11 +3226,10 @@ status_callback(zpool_handle_t *zhp, void *data)
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth,
"NAME", "STATE", "READ", "WRITE", "CKSUM");
print_status_config(zhp, zpool_get_name(zhp), nvroot,
- namewidth, 0, B_FALSE, B_FALSE);
- if (num_logs(nvroot) > 0)
- print_status_config(zhp, "logs", nvroot, namewidth, 0,
- B_FALSE, B_TRUE);
+ namewidth, 0, B_FALSE);
+ if (num_logs(nvroot) > 0)
+ print_logs(zhp, nvroot, namewidth);
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&l2cache, &nl2cache) == 0)
print_l2cache(zhp, l2cache, nl2cache, namewidth);
@@ -3496,8 +3530,8 @@ zpool_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 11 Improved scrub performance\n"));
(void) printf(gettext(" 12 Snapshot properties\n"));
(void) printf(gettext(" 13 snapused property\n"));
- (void) printf(gettext(" 14 passthrough-x aclinherit "
- "support\n"));
+ (void) printf(gettext(" 14 passthrough-x aclinherit\n"));
+ (void) printf(gettext(" 15 user/group space accounting\n"));
(void) printf(gettext("For more information on a particular "
"version, including supported releases, see:\n\n"));
(void) printf("http://www.opensolaris.org/os/community/zfs/"
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index ff55c29c48ac..2e75bc85564f 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -76,6 +76,7 @@
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/txg.h>
+#include <sys/dbuf.h>
#include <sys/zap.h>
#include <sys/dmu_objset.h>
#include <sys/poll.h>
@@ -165,9 +166,11 @@ typedef void ztest_func_t(ztest_args_t *);
* Note: these aren't static because we want dladdr() to work.
*/
ztest_func_t ztest_dmu_read_write;
+ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
ztest_func_t ztest_zap;
+ztest_func_t ztest_fzap;
ztest_func_t ztest_zap_parallel;
ztest_func_t ztest_traverse;
ztest_func_t ztest_dsl_prop_get_set;
@@ -200,19 +203,21 @@ uint64_t zopt_rarely = 60; /* every 60 seconds */
ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
+ { ztest_dmu_read_write_zcopy, 1, &zopt_always },
{ ztest_dmu_write_parallel, 30, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
+ { ztest_fzap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
{ ztest_dsl_prop_get_set, 1, &zopt_sometimes },
{ ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
- { ztest_dsl_dataset_promote_busy, 1, &zopt_sometimes },
{ ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
{ ztest_spa_rename, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1, &zopt_vdevtime },
{ ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
{ ztest_scrub, 1, &zopt_vdevtime },
@@ -247,9 +252,11 @@ static ztest_shared_t *ztest_shared;
static int ztest_random_fd;
static int ztest_dump_core = 1;
+static uint64_t metaslab_sz;
static boolean_t ztest_exiting;
extern uint64_t metaslab_gang_bang;
+extern uint64_t metaslab_df_alloc_threshold;
#define ZTEST_DIROBJ 1
#define ZTEST_MICROZAP_OBJ 2
@@ -424,10 +431,10 @@ ztest_random(uint64_t range)
return (r % range);
}
+/* ARGSUSED */
static void
ztest_record_enospc(char *s)
{
- dprintf("ENOSPC doing: %s\n", s ? s : "<unknown>");
ztest_shared->zs_enospc_count++;
}
@@ -706,15 +713,9 @@ ztest_random_compress(void)
return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
}
-typedef struct ztest_replay {
- objset_t *zr_os;
- uint64_t zr_assign;
-} ztest_replay_t;
-
static int
-ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
+ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
{
- objset_t *os = zr->zr_os;
dmu_tx_t *tx;
int error;
@@ -723,7 +724,7 @@ ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, zr->zr_assign);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
@@ -740,16 +741,15 @@ ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
(void) printf("replay create of %s object %llu"
" in txg %llu = %d\n",
osname, (u_longlong_t)lr->lr_doid,
- (u_longlong_t)zr->zr_assign, error);
+ (u_longlong_t)dmu_tx_get_txg(tx), error);
}
return (error);
}
static int
-ztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap)
+ztest_replay_remove(objset_t *os, lr_remove_t *lr, boolean_t byteswap)
{
- objset_t *os = zr->zr_os;
dmu_tx_t *tx;
int error;
@@ -758,7 +758,7 @@ ztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap)
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, zr->zr_assign);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
@@ -784,6 +784,13 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_TRUNCATE */
NULL, /* TX_SETATTR */
NULL, /* TX_ACL */
+ NULL, /* TX_CREATE_ACL */
+ NULL, /* TX_CREATE_ATTR */
+ NULL, /* TX_CREATE_ACL_ATTR */
+ NULL, /* TX_MKDIR_ACL */
+ NULL, /* TX_MKDIR_ATTR */
+ NULL, /* TX_MKDIR_ACL_ATTR */
+ NULL, /* TX_WRITE2 */
};
/*
@@ -985,7 +992,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
uint64_t leaf, top;
uint64_t ashift = ztest_get_ashift();
- uint64_t oldguid;
+ uint64_t oldguid, pguid;
size_t oldsize, newsize;
char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
int replacing;
@@ -1017,10 +1024,16 @@ ztest_vdev_attach_detach(ztest_args_t *za)
* Locate this vdev.
*/
oldvd = rvd->vdev_child[top];
- if (zopt_mirrors >= 1)
+ if (zopt_mirrors >= 1) {
+ ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
+ ASSERT(oldvd->vdev_children >= zopt_mirrors);
oldvd = oldvd->vdev_child[leaf / zopt_raidz];
- if (zopt_raidz > 1)
+ }
+ if (zopt_raidz > 1) {
+ ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
+ ASSERT(oldvd->vdev_children == zopt_raidz);
oldvd = oldvd->vdev_child[leaf % zopt_raidz];
+ }
/*
* If we're already doing an attach or replace, oldvd may be a
@@ -1028,8 +1041,8 @@ ztest_vdev_attach_detach(ztest_args_t *za)
*/
while (oldvd->vdev_children != 0) {
oldvd_has_siblings = B_TRUE;
- ASSERT(oldvd->vdev_children == 2);
- oldvd = oldvd->vdev_child[ztest_random(2)];
+ ASSERT(oldvd->vdev_children >= 2);
+ oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
}
oldguid = oldvd->vdev_guid;
@@ -1037,16 +1050,17 @@ ztest_vdev_attach_detach(ztest_args_t *za)
oldvd_is_log = oldvd->vdev_top->vdev_islog;
(void) strcpy(oldpath, oldvd->vdev_path);
pvd = oldvd->vdev_parent;
+ pguid = pvd->vdev_guid;
/*
* If oldvd has siblings, then half of the time, detach it.
*/
if (oldvd_has_siblings && ztest_random(2) == 0) {
spa_config_exit(spa, SCL_VDEV, FTAG);
- error = spa_vdev_detach(spa, oldguid, B_FALSE);
- if (error != 0 && error != ENODEV && error != EBUSY)
- fatal(0, "detach (%s) returned %d",
- oldpath, error);
+ error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
+ if (error != 0 && error != ENODEV && error != EBUSY &&
+ error != ENOTSUP)
+ fatal(0, "detach (%s) returned %d", oldpath, error);
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
return;
}
@@ -1146,7 +1160,6 @@ ztest_vdev_attach_detach(ztest_args_t *za)
/*
* Verify that dynamic LUN growth works as expected.
*/
-/* ARGSUSED */
void
ztest_vdev_LUN_growth(ztest_args_t *za)
{
@@ -1286,7 +1299,6 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
zilog_t *zilog;
uint64_t seq;
uint64_t objects;
- ztest_replay_t zr;
(void) rw_rdlock(&ztest_shared->zs_name_lock);
(void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
@@ -1303,8 +1315,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
*/
if (ztest_random(2) == 0 &&
dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
- zr.zr_os = os;
- zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL);
+ zil_replay(os, os, ztest_replay_vector);
dmu_objset_close(os);
}
@@ -1436,7 +1447,8 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
error = dmu_objset_destroy(snapname);
if (error != 0 && error != ENOENT)
fatal(0, "dmu_objset_destroy() = %d", error);
- error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE);
+ error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
+ NULL, FALSE);
if (error == ENOSPC)
ztest_record_enospc("dmu_take_snapshot");
else if (error != 0 && error != EEXIST)
@@ -1474,11 +1486,15 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
(void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval++);
(void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval++);
- error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot = %d", error);
+ error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
+ NULL, FALSE);
+ if (error && error != EEXIST) {
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ goto out;
+ }
+ fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
+ }
error = dmu_objset_open(snap1name, DMU_OST_OTHER,
DS_MODE_USER | DS_MODE_READONLY, &clone);
@@ -1487,23 +1503,34 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0,
NULL, NULL);
- if (error)
- fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
dmu_objset_close(clone);
+ if (error) {
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ goto out;
+ }
+ fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
+ }
error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
- FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot = %d", error);
+ NULL, FALSE);
+ if (error && error != EEXIST) {
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ goto out;
+ }
+ fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
+ }
error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
- FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot = %d", error);
+ NULL, FALSE);
+ if (error && error != EEXIST) {
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ goto out;
+ }
+ fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
+ }
error = dmu_objset_open(snap3name, DMU_OST_OTHER,
DS_MODE_USER | DS_MODE_READONLY, &clone);
@@ -1512,9 +1539,14 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0,
NULL, NULL);
- if (error)
- fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
dmu_objset_close(clone);
+ if (error) {
+ if (error == ENOSPC) {
+ ztest_record_enospc("dmu_objset_create");
+ goto out;
+ }
+ fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
+ }
error = dsl_dataset_own(snap1name, 0, FTAG, &ds);
if (error)
@@ -1525,23 +1557,24 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
error);
dsl_dataset_disown(ds, FTAG);
+out:
error = dmu_objset_destroy(clone2name);
- if (error)
+ if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
error = dmu_objset_destroy(snap3name);
- if (error)
+ if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
error = dmu_objset_destroy(snap2name);
- if (error)
+ if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
error = dmu_objset_destroy(clone1name);
- if (error)
+ if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
error = dmu_objset_destroy(snap1name);
- if (error)
+ if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
(void) rw_unlock(&ztest_shared->zs_name_lock);
@@ -1570,7 +1603,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* Create a batch object if necessary, and record it in the directory.
*/
VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj));
+ sizeof (uint64_t), &batchobj, DMU_READ_PREFETCH));
if (batchobj == 0) {
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
@@ -1595,7 +1628,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
*/
for (b = 0; b < batchsize; b++) {
VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object));
+ sizeof (uint64_t), &object, DMU_READ_PREFETCH));
if (object == 0)
continue;
/*
@@ -1630,7 +1663,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* We expect the word at endoff to be our object number.
*/
VERIFY(0 == dmu_read(os, object, endoff,
- sizeof (uint64_t), &temp));
+ sizeof (uint64_t), &temp, DMU_READ_PREFETCH));
if (temp != object) {
fatal(0, "bad data in %s, got %llu, expected %llu",
@@ -1815,7 +1848,7 @@ ztest_dmu_read_write(ztest_args_t *za)
* Read the directory info. If it's the first time, set things up.
*/
VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd));
+ sizeof (dd), &dd, DMU_READ_PREFETCH));
if (dd.dd_chunk == 0) {
ASSERT(dd.dd_packobj == 0);
ASSERT(dd.dd_bigobj == 0);
@@ -1877,9 +1910,11 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the current contents of our objects.
*/
- error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
+ error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf,
+ DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
- error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
+ error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf,
+ DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
/*
@@ -1985,9 +2020,9 @@ ztest_dmu_read_write(ztest_args_t *za)
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
- packsize, packcheck));
+ packsize, packcheck, DMU_READ_PREFETCH));
VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
- bigsize, bigcheck));
+ bigsize, bigcheck, DMU_READ_PREFETCH));
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
@@ -2001,6 +2036,314 @@ ztest_dmu_read_write(ztest_args_t *za)
}
void
+compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
+ uint64_t bigsize, uint64_t n, dmu_read_write_dir_t dd, uint64_t txg)
+{
+ uint64_t i;
+ bufwad_t *pack;
+ bufwad_t *bigH;
+ bufwad_t *bigT;
+
+ /*
+ * For each index from n to n + s, verify that the existing bufwad
+ * in packobj matches the bufwads at the head and tail of the
+ * corresponding chunk in bigobj. Then update all three bufwads
+ * with the new values we want to write out.
+ */
+ for (i = 0; i < s; i++) {
+ /* LINTED */
+ pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
+ /* LINTED */
+ bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+ /* LINTED */
+ bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+
+ ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
+ ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
+
+ if (pack->bw_txg > txg)
+ fatal(0, "future leak: got %llx, open txg is %llx",
+ pack->bw_txg, txg);
+
+ if (pack->bw_data != 0 && pack->bw_index != n + i)
+ fatal(0, "wrong index: got %llx, wanted %llx+%llx",
+ pack->bw_index, n, i);
+
+ if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
+ fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
+
+ if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
+ fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
+
+ pack->bw_index = n + i;
+ pack->bw_txg = txg;
+ pack->bw_data = 1 + ztest_random(-2ULL);
+
+ *bigH = *pack;
+ *bigT = *pack;
+ }
+}
+
+void
+ztest_dmu_read_write_zcopy(ztest_args_t *za)
+{
+ objset_t *os = za->za_os;
+ dmu_read_write_dir_t dd;
+ dmu_tx_t *tx;
+ uint64_t i;
+ int error;
+ uint64_t n, s, txg;
+ bufwad_t *packbuf, *bigbuf;
+ uint64_t packoff, packsize, bigoff, bigsize;
+ uint64_t regions = 997;
+ uint64_t stride = 123456789ULL;
+ uint64_t width = 9;
+ dmu_buf_t *bonus_db;
+ arc_buf_t **bigbuf_arcbufs;
+ dmu_object_info_t *doi = &za->za_doi;
+
+ /*
+ * This test uses two objects, packobj and bigobj, that are always
+ * updated together (i.e. in the same tx) so that their contents are
+ * in sync and can be compared. Their contents relate to each other
+ * in a simple way: packobj is a dense array of 'bufwad' structures,
+ * while bigobj is a sparse array of the same bufwads. Specifically,
+ * for any index n, there are three bufwads that should be identical:
+ *
+ * packobj, at offset n * sizeof (bufwad_t)
+ * bigobj, at the head of the nth chunk
+ * bigobj, at the tail of the nth chunk
+ *
+ * The chunk size is set equal to bigobj block size so that
+ * dmu_assign_arcbuf() can be tested for object updates.
+ */
+
+ /*
+ * Read the directory info. If it's the first time, set things up.
+ */
+ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (dd), &dd, DMU_READ_PREFETCH));
+ if (dd.dd_chunk == 0) {
+ ASSERT(dd.dd_packobj == 0);
+ ASSERT(dd.dd_bigobj == 0);
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
+ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("create r/w directory");
+ dmu_tx_abort(tx);
+ return;
+ }
+
+ dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+ DMU_OT_NONE, 0, tx);
+ dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+ DMU_OT_NONE, 0, tx);
+ ztest_set_random_blocksize(os, dd.dd_packobj, tx);
+ ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
+
+ VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
+ ASSERT(doi->doi_data_block_size >= 2 * sizeof (bufwad_t));
+ ASSERT(ISP2(doi->doi_data_block_size));
+ dd.dd_chunk = doi->doi_data_block_size;
+
+ dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
+ tx);
+ dmu_tx_commit(tx);
+ } else {
+ VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
+ VERIFY(ISP2(doi->doi_data_block_size));
+ VERIFY(dd.dd_chunk == doi->doi_data_block_size);
+ VERIFY(dd.dd_chunk >= 2 * sizeof (bufwad_t));
+ }
+
+ /*
+ * Pick a random index and compute the offsets into packobj and bigobj.
+ */
+ n = ztest_random(regions) * stride + ztest_random(width);
+ s = 1 + ztest_random(width - 1);
+
+ packoff = n * sizeof (bufwad_t);
+ packsize = s * sizeof (bufwad_t);
+
+ bigoff = n * dd.dd_chunk;
+ bigsize = s * dd.dd_chunk;
+
+ packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
+ bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
+
+ VERIFY(dmu_bonus_hold(os, dd.dd_bigobj, FTAG, &bonus_db) == 0);
+
+ bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
+
+ /*
+ * Iteration 0 test zcopy for DB_UNCACHED dbufs.
+ * Iteration 1 test zcopy to already referenced dbufs.
+ * Iteration 2 test zcopy to dirty dbuf in the same txg.
+ * Iteration 3 test zcopy to dbuf dirty in previous txg.
+ * Iteration 4 test zcopy when dbuf is no longer dirty.
+ * Iteration 5 test zcopy when it can't be done.
+ * Iteration 6 one more zcopy write.
+ */
+ for (i = 0; i < 7; i++) {
+ uint64_t j;
+ uint64_t off;
+
+ /*
+ * In iteration 5 (i == 5) use arcbufs
+ * that don't match bigobj blksz to test
+ * dmu_assign_arcbuf() when it can't directly
+ * assign an arcbuf to a dbuf.
+ */
+ for (j = 0; j < s; j++) {
+ if (i != 5) {
+ bigbuf_arcbufs[j] =
+ dmu_request_arcbuf(bonus_db,
+ dd.dd_chunk);
+ } else {
+ bigbuf_arcbufs[2 * j] =
+ dmu_request_arcbuf(bonus_db,
+ dd.dd_chunk / 2);
+ bigbuf_arcbufs[2 * j + 1] =
+ dmu_request_arcbuf(bonus_db,
+ dd.dd_chunk / 2);
+ }
+ }
+
+ /*
+ * Get a tx for the mods to both packobj and bigobj.
+ */
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
+ dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
+
+ if (ztest_random(100) == 0) {
+ error = -1;
+ } else {
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ }
+
+ if (error) {
+ if (error != -1) {
+ ztest_record_enospc("dmu r/w range");
+ }
+ dmu_tx_abort(tx);
+ umem_free(packbuf, packsize);
+ umem_free(bigbuf, bigsize);
+ for (j = 0; j < s; j++) {
+ if (i != 5) {
+ dmu_return_arcbuf(bigbuf_arcbufs[j]);
+ } else {
+ dmu_return_arcbuf(
+ bigbuf_arcbufs[2 * j]);
+ dmu_return_arcbuf(
+ bigbuf_arcbufs[2 * j + 1]);
+ }
+ }
+ umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
+ dmu_buf_rele(bonus_db, FTAG);
+ return;
+ }
+
+ txg = dmu_tx_get_txg(tx);
+
+ /*
+ * 50% of the time don't read objects in the 1st iteration to
+ * test dmu_assign_arcbuf() for the case when there're no
+ * existing dbufs for the specified offsets.
+ */
+ if (i != 0 || ztest_random(2) != 0) {
+ error = dmu_read(os, dd.dd_packobj, packoff,
+ packsize, packbuf, DMU_READ_PREFETCH);
+ ASSERT3U(error, ==, 0);
+ error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize,
+ bigbuf, DMU_READ_PREFETCH);
+ ASSERT3U(error, ==, 0);
+ }
+ compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
+ n, dd, txg);
+
+ /*
+ * We've verified all the old bufwads, and made new ones.
+ * Now write them out.
+ */
+ dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
+ if (zopt_verbose >= 6) {
+ (void) printf("writing offset %llx size %llx"
+ " txg %llx\n",
+ (u_longlong_t)bigoff,
+ (u_longlong_t)bigsize,
+ (u_longlong_t)txg);
+ }
+ for (off = bigoff, j = 0; j < s; j++, off += dd.dd_chunk) {
+ dmu_buf_t *dbt;
+ if (i != 5) {
+ bcopy((caddr_t)bigbuf + (off - bigoff),
+ bigbuf_arcbufs[j]->b_data, dd.dd_chunk);
+ } else {
+ bcopy((caddr_t)bigbuf + (off - bigoff),
+ bigbuf_arcbufs[2 * j]->b_data,
+ dd.dd_chunk / 2);
+ bcopy((caddr_t)bigbuf + (off - bigoff) +
+ dd.dd_chunk / 2,
+ bigbuf_arcbufs[2 * j + 1]->b_data,
+ dd.dd_chunk / 2);
+ }
+
+ if (i == 1) {
+ VERIFY(dmu_buf_hold(os, dd.dd_bigobj, off,
+ FTAG, &dbt) == 0);
+ }
+ if (i != 5) {
+ dmu_assign_arcbuf(bonus_db, off,
+ bigbuf_arcbufs[j], tx);
+ } else {
+ dmu_assign_arcbuf(bonus_db, off,
+ bigbuf_arcbufs[2 * j], tx);
+ dmu_assign_arcbuf(bonus_db,
+ off + dd.dd_chunk / 2,
+ bigbuf_arcbufs[2 * j + 1], tx);
+ }
+ if (i == 1) {
+ dmu_buf_rele(dbt, FTAG);
+ }
+ }
+ dmu_tx_commit(tx);
+
+ /*
+ * Sanity check the stuff we just wrote.
+ */
+ {
+ void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
+ void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
+
+ VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+ packsize, packcheck, DMU_READ_PREFETCH));
+ VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+ bigsize, bigcheck, DMU_READ_PREFETCH));
+
+ ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
+ ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
+
+ umem_free(packcheck, packsize);
+ umem_free(bigcheck, bigsize);
+ }
+ if (i == 2) {
+ txg_wait_open(dmu_objset_pool(os), 0);
+ } else if (i == 3) {
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ }
+ }
+
+ dmu_buf_rele(bonus_db, FTAG);
+ umem_free(packbuf, packsize);
+ umem_free(bigbuf, bigsize);
+ umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
+}
+
+void
ztest_dmu_check_future_leak(ztest_args_t *za)
{
objset_t *os = za->za_os;
@@ -2049,6 +2392,8 @@ ztest_dmu_write_parallel(ztest_args_t *za)
uint64_t blkoff;
zbookmark_t zb;
dmu_tx_t *tx = dmu_tx_create(os);
+ dmu_buf_t *bonus_db;
+ arc_buf_t *abuf = NULL;
dmu_objset_name(os, osname);
@@ -2077,6 +2422,12 @@ ztest_dmu_write_parallel(ztest_args_t *za)
}
}
+ if (off != -1ULL && P2PHASE(off, bs) == 0 && !do_free &&
+ ztest_random(8) == 0) {
+ VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &bonus_db) == 0);
+ abuf = dmu_request_arcbuf(bonus_db, bs);
+ }
+
txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
error = dmu_tx_assign(tx, txg_how);
if (error) {
@@ -2087,6 +2438,10 @@ ztest_dmu_write_parallel(ztest_args_t *za)
ztest_record_enospc("dmu write parallel");
}
dmu_tx_abort(tx);
+ if (abuf != NULL) {
+ dmu_return_arcbuf(abuf);
+ dmu_buf_rele(bonus_db, FTAG);
+ }
return;
}
txg = dmu_tx_get_txg(tx);
@@ -2141,8 +2496,12 @@ ztest_dmu_write_parallel(ztest_args_t *za)
za->za_dbuf = NULL;
} else if (do_free) {
VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0);
- } else {
+ } else if (abuf == NULL) {
dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx);
+ } else {
+ bcopy(wbt, abuf->b_data, btsize);
+ dmu_assign_arcbuf(bonus_db, off, abuf, tx);
+ dmu_buf_rele(bonus_db, FTAG);
}
(void) mutex_unlock(lp);
@@ -2170,8 +2529,6 @@ ztest_dmu_write_parallel(ztest_args_t *za)
error = dmu_buf_hold(os, ZTEST_DIROBJ, blkoff, FTAG, &db);
za->za_dbuf = db;
if (error) {
- dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, blkoff, error);
(void) mutex_unlock(lp);
return;
}
@@ -2180,19 +2537,20 @@ ztest_dmu_write_parallel(ztest_args_t *za)
dmu_buf_rele(db, FTAG);
za->za_dbuf = NULL;
- (void) mutex_unlock(lp);
-
if (error) {
- dprintf("dmu_sync(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, off, error);
+ (void) mutex_unlock(lp);
return;
}
- if (blk.blk_birth == 0) /* concurrent free */
+ if (blk.blk_birth == 0) { /* concurrent free */
+ (void) mutex_unlock(lp);
return;
+ }
txg_suspend(dmu_objset_pool(os));
+ (void) mutex_unlock(lp);
+
ASSERT(blk.blk_fill == 1);
ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
@@ -2265,7 +2623,7 @@ ztest_zap(ztest_args_t *za)
* Create a new object if necessary, and record it in the directory.
*/
VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object));
+ sizeof (uint64_t), &object, DMU_READ_PREFETCH));
if (object == 0) {
tx = dmu_tx_create(os);
@@ -2444,6 +2802,102 @@ ztest_zap(ztest_args_t *za)
dmu_tx_commit(tx);
}
+/*
+ * Testcase to test the upgrading of a microzap to fatzap.
+ */
+void
+ztest_fzap(ztest_args_t *za)
+{
+ objset_t *os = za->za_os;
+ uint64_t object;
+ uint64_t value;
+ dmu_tx_t *tx;
+ int i, error;
+ char osname[MAXNAMELEN];
+ char *name = "aaa";
+ char entname[MAXNAMELEN];
+
+ dmu_objset_name(os, osname);
+
+ /*
+ * Create a new object if necessary, and record it in the directory.
+ */
+ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (uint64_t), &object, DMU_READ_PREFETCH));
+
+ if (object == 0) {
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (uint64_t));
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("create zap test obj");
+ dmu_tx_abort(tx);
+ return;
+ }
+ object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
+ if (error) {
+ fatal(0, "zap_create('%s', %llu) = %d",
+ osname, object, error);
+ }
+ ASSERT(object != 0);
+ dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (uint64_t), &object, tx);
+ dmu_tx_commit(tx);
+ }
+
+ /*
+ * Add entries to this ZAP amd make sure it spills over
+ * and gets upgraded to a fatzap. Also, since we are adding
+ * 2050 entries we should see ptrtbl growth and leaf-block
+ * split.
+ */
+ for (i = 0; i < 2050; i++) {
+ (void) snprintf(entname, sizeof (entname), "%s-%d", name, i);
+ value = i;
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, TRUE, entname);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+
+ if (error) {
+ ztest_record_enospc("create zap entry");
+ dmu_tx_abort(tx);
+ return;
+ }
+ error = zap_add(os, object, entname, sizeof (uint64_t),
+ 1, &value, tx);
+
+ ASSERT(error == 0 || error == EEXIST);
+ dmu_tx_commit(tx);
+ }
+
+ /*
+ * Once in a while, destroy the object.
+ */
+ if (ztest_random(1000) != 0)
+ return;
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
+ dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("destroy zap object");
+ dmu_tx_abort(tx);
+ return;
+ }
+ error = zap_destroy(os, object, tx);
+ if (error)
+ fatal(0, "zap_destroy('%s', %llu) = %d",
+ osname, object, error);
+ object = 0;
+ dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
+ &object, tx);
+ dmu_tx_commit(tx);
+}
+
void
ztest_zap_parallel(ztest_args_t *za)
{
@@ -2695,8 +3149,6 @@ ztest_fault_inject(ztest_args_t *za)
maxfaults = INT_MAX; /* no limit on cache devices */
}
- dprintf("damaging %s and %s\n", path0, pathrand);
-
spa_config_exit(spa, SCL_STATE, FTAG);
if (maxfaults == 0)
@@ -2706,10 +3158,13 @@ ztest_fault_inject(ztest_args_t *za)
* If we can tolerate two or more faults, randomly online/offline vd0.
*/
if (maxfaults >= 2 && guid0 != 0) {
- if (ztest_random(10) < 6)
- (void) vdev_offline(spa, guid0, B_TRUE);
- else
- (void) vdev_online(spa, guid0, B_FALSE, NULL);
+ if (ztest_random(10) < 6) {
+ int flags = (ztest_random(2) == 0 ?
+ ZFS_OFFLINE_TEMPORARY : 0);
+ VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
+ } else {
+ (void) vdev_online(spa, guid0, 0, NULL);
+ }
}
/*
@@ -2918,7 +3373,7 @@ ztest_verify_blocks(char *pool)
isa = strdup(isa);
/* LINTED */
(void) sprintf(bin,
- "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
+ "/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s",
isalen,
isa,
zopt_verbose >= 3 ? "s" : "",
@@ -2966,7 +3421,7 @@ ztest_walk_pool_directory(char *header)
static void
ztest_spa_import_export(char *oldname, char *newname)
{
- nvlist_t *config;
+ nvlist_t *config, *newconfig;
uint64_t pool_guid;
spa_t *spa;
int error;
@@ -2988,6 +3443,12 @@ ztest_spa_import_export(char *oldname, char *newname)
if (error)
fatal(0, "spa_open('%s') = %d", oldname, error);
+ /*
+ * Kick off a scrub to tickle scrub/export races.
+ */
+ if (ztest_random(2) == 0)
+ (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
+
pool_guid = spa_guid(spa);
spa_close(spa, FTAG);
@@ -3003,6 +3464,13 @@ ztest_spa_import_export(char *oldname, char *newname)
ztest_walk_pool_directory("pools after export");
/*
+ * Try to import it.
+ */
+ newconfig = spa_tryimport(config);
+ ASSERT(newconfig != NULL);
+ nvlist_free(newconfig);
+
+ /*
* Import it under the new name.
*/
error = spa_import(newname, config, NULL);
@@ -3044,22 +3512,25 @@ ztest_spa_import_export(char *oldname, char *newname)
nvlist_free(config);
}
+static void
+ztest_resume(spa_t *spa)
+{
+ if (spa_suspended(spa)) {
+ spa_vdev_state_enter(spa);
+ vdev_clear(spa, NULL);
+ (void) spa_vdev_state_exit(spa, NULL, 0);
+ (void) zio_resume(spa);
+ }
+}
+
static void *
-ztest_resume(void *arg)
+ztest_resume_thread(void *arg)
{
spa_t *spa = arg;
while (!ztest_exiting) {
(void) poll(NULL, 0, 1000);
-
- if (!spa_suspended(spa))
- continue;
-
- spa_vdev_state_enter(spa);
- vdev_clear(spa, NULL);
- (void) spa_vdev_state_exit(spa, NULL, 0);
-
- zio_resume(spa);
+ ztest_resume(spa);
}
return (NULL);
}
@@ -3202,9 +3673,19 @@ ztest_run(char *pool)
VERIFY(spa_open(pool, &spa, FTAG) == 0);
/*
+ * We don't expect the pool to suspend unless maxfaults == 0,
+ * in which case ztest_fault_inject() temporarily takes away
+ * the only valid replica.
+ */
+ if (zopt_maxfaults == 0)
+ spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
+ else
+ spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
+
+ /*
* Create a thread to periodically resume suspended I/O.
*/
- VERIFY(thr_create(0, 0, ztest_resume, spa, THR_BOUND,
+ VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND,
&resume_tid) == 0);
/*
@@ -3253,7 +3734,6 @@ ztest_run(char *pool)
za[t].za_kill = za[0].za_kill;
if (t < zopt_datasets) {
- ztest_replay_t zr;
int test_future = FALSE;
(void) rw_rdlock(&ztest_shared->zs_name_lock);
(void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
@@ -3277,9 +3757,8 @@ ztest_run(char *pool)
(void) rw_unlock(&ztest_shared->zs_name_lock);
if (test_future)
ztest_dmu_check_future_leak(&za[t]);
- zr.zr_os = za[d].za_os;
- zil_replay(zr.zr_os, &zr, &zr.zr_assign,
- ztest_replay_vector, NULL);
+ zil_replay(za[d].za_os, za[d].za_os,
+ ztest_replay_vector);
za[d].za_zilog = zil_open(za[d].za_os, NULL);
}
@@ -3324,6 +3803,7 @@ ztest_run(char *pool)
/* Kill the resume thread */
ztest_exiting = B_TRUE;
VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
+ ztest_resume(spa);
/*
* Right before closing the pool, kick off a bunch of async I/O;
@@ -3391,6 +3871,8 @@ ztest_init(char *pool)
if (error)
fatal(0, "spa_open() = %d", error);
+ metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+
if (zopt_verbose >= 3)
show_pool_stats(spa);
@@ -3419,11 +3901,6 @@ main(int argc, char **argv)
process_options(argc, argv);
- argc -= optind;
- argv += optind;
-
- dprintf_setup(&argc, argv);
-
/*
* Blow away any existing copy of zpool.cache
*/
@@ -3487,6 +3964,9 @@ main(int argc, char **argv)
zi->zi_call_time = 0;
}
+ /* Set the allocation switch size */
+ metaslab_df_alloc_threshold = ztest_random(metaslab_sz / 4) + 1;
+
pid = fork();
if (pid == -1)
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
index a77317ef9fae..3f7abd2f17fe 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@@ -29,6 +29,7 @@
#include <assert.h>
#include <libnvpair.h>
+#include <sys/mnttab.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/varargs.h>
@@ -175,6 +176,14 @@ extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
extern int libzfs_errno(libzfs_handle_t *);
extern const char *libzfs_error_action(libzfs_handle_t *);
extern const char *libzfs_error_description(libzfs_handle_t *);
+extern void libzfs_mnttab_init(libzfs_handle_t *);
+extern void libzfs_mnttab_fini(libzfs_handle_t *);
+extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
+extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
+ struct mnttab *);
+extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
+ const char *, const char *);
+extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *);
/*
* Basic handle functions
@@ -256,9 +265,15 @@ typedef enum {
ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
+
+ /*
+ * These faults have no corresponding message ID. At the time we are
+ * checking the status, the original reason for the FMA fault (I/O or
+ * checksum errors) has been lost.
+ */
ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */
ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */
- ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
/*
* The following are not faults per se, but still an error possibly
@@ -354,6 +369,10 @@ extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
zprop_source_t *, char *, size_t, boolean_t);
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
zprop_source_t *, char *, size_t);
+extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
+ uint64_t *propvalue);
+extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
+ char *propbuf, int proplen, boolean_t literal);
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
extern int zfs_prop_inherit(zfs_handle_t *, const char *);
extern const char *zfs_prop_values(zfs_prop_t);
@@ -441,6 +460,12 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
boolean_t, boolean_t, boolean_t, boolean_t, int);
extern int zfs_promote(zfs_handle_t *);
+typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
+ uid_t rid, uint64_t space);
+
+extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
+ zfs_userspace_cb_t func, void *arg);
+
typedef struct recvflags {
/* print informational messages (ie, -v was specified) */
int verbose : 1;
@@ -479,17 +504,6 @@ extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
extern int zfs_spa_version(zfs_handle_t *, int *);
/*
- * dataset permission functions.
- */
-extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
-extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
-extern int zfs_build_perms(zfs_handle_t *, char *, char *,
- zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
-extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
-extern void zfs_free_allows(zfs_allow_t *);
-extern void zfs_deleg_permissions(void);
-
-/*
* Mount support functions.
*/
extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
@@ -525,7 +539,7 @@ extern int zfs_unshare_iscsi(zfs_handle_t *);
#ifdef TODO
extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
#endif
-extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *,
+extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
void *, void *, int, zfs_share_op_t);
/*
@@ -571,6 +585,15 @@ extern int zpool_remove_zvol_links(zpool_handle_t *);
extern int zvol_check_dump_config(char *);
/*
+ * Management interfaces for SMB ACL files
+ */
+
+int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
+int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
+int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
+int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
+
+/*
* Enable and disable datasets within a pool by mounting/unmounting and
* sharing/unsharing them.
*/
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
index b905bc6cb6af..6fa196710983 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Portions Copyright 2007 Ramprakash Jelari
@@ -218,6 +218,7 @@ changelist_postfix(prop_changelist_t *clp)
boolean_t sharenfs;
boolean_t sharesmb;
+ boolean_t mounted;
/*
* If we are in the global zone, but this dataset is exported
@@ -272,20 +273,29 @@ changelist_postfix(prop_changelist_t *clp)
shareopts, sizeof (shareopts), NULL, NULL, 0,
B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
- if ((cn->cn_mounted || clp->cl_waslegacy || sharenfs ||
- sharesmb) && !zfs_is_mounted(cn->cn_handle, NULL) &&
- zfs_mount(cn->cn_handle, NULL, 0) != 0)
- errors++;
+ mounted = zfs_is_mounted(cn->cn_handle, NULL);
+
+ if (!mounted && (cn->cn_mounted ||
+ ((sharenfs || sharesmb || clp->cl_waslegacy) &&
+ (zfs_prop_get_int(cn->cn_handle,
+ ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {
+
+ if (zfs_mount(cn->cn_handle, NULL, 0) != 0)
+ errors++;
+ else
+ mounted = TRUE;
+ }
/*
- * We always re-share even if the filesystem is currently
- * shared, so that we can adopt any new options.
+ * If the file system is mounted we always re-share even
+ * if the filesystem is currently shared, so that we can
+ * adopt any new options.
*/
- if (sharenfs)
+ if (sharenfs && mounted)
errors += zfs_share_nfs(cn->cn_handle);
else if (cn->cn_shared || clp->cl_waslegacy)
errors += zfs_unshare_nfs(cn->cn_handle, NULL);
- if (sharesmb)
+ if (sharesmb && mounted)
errors += zfs_share_smb(cn->cn_handle);
else if (cn->cn_shared || clp->cl_waslegacy)
errors += zfs_unshare_smb(cn->cn_handle, NULL);
@@ -621,8 +631,6 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
} else if (prop == ZFS_PROP_VOLSIZE) {
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- } else if (prop == ZFS_PROP_VERSION) {
- clp->cl_prop = ZFS_PROP_MOUNTPOINT;
} else {
clp->cl_prop = prop;
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
index b1a2c7ae1d9a..c2f0f0368045 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
@@ -37,16 +37,17 @@
#include <zone.h>
#include <fcntl.h>
#include <sys/mntent.h>
-#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/avl.h>
#include <priv.h>
#include <pwd.h>
#include <grp.h>
#include <stddef.h>
+#include <idmap.h>
#include <sys/spa.h>
#include <sys/zap.h>
+#include <sys/misc.h>
#include <libzfs.h>
#include "zfs_namecheck.h"
@@ -55,6 +56,8 @@
#include "zfs_deleg.h"
static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
+static int userquota_propname_decode(const char *propname, boolean_t zoned,
+ zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
/*
* Given a single type (not a mask of types), return the type in a human
@@ -106,7 +109,6 @@ path_to_str(const char *path, int types)
return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
}
-
/*
* The user has requested either filesystems or volumes.
* We have no way of knowing a priori what type this would be, so always
@@ -121,8 +123,8 @@ path_to_str(const char *path, int types)
/*
* Validate a ZFS path. This is used even before trying to open the dataset, to
- * provide a more meaningful error message. We place a more useful message in
- * 'buf' detailing exactly why the name was not valid.
+ * provide a more meaningful error message. We call zfs_error_aux() to
+ * explain exactly why the name was not valid.
*/
static int
zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
@@ -316,39 +318,39 @@ zpool_free_handles(libzfs_handle_t *hdl)
/*
* Utility function to gather stats (objset and zpl) for the given object.
*/
-static int
-get_stats(zfs_handle_t *zhp)
+get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
- zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
- nvlist_t *allprops, *userprops;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
- if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
- return (-1);
-
- while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+ while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) {
if (errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
+ if (zcmd_expand_dst_nvlist(hdl, zc) != 0) {
return (-1);
}
} else {
- zcmd_free_nvlists(&zc);
return (-1);
}
}
+ return (0);
+}
- zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
+static int
+put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc)
+{
+ nvlist_t *allprops, *userprops;
- if (zcmd_read_dst_nvlist(hdl, &zc, &allprops) != 0) {
- zcmd_free_nvlists(&zc);
+ zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */
+
+ if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) {
return (-1);
}
- zcmd_free_nvlists(&zc);
-
+ /*
+ * XXX Why do we store the user props separately, in addition to
+ * storing them in zfs_props?
+ */
if ((userprops = process_user_props(zhp, allprops)) == NULL) {
nvlist_free(allprops);
return (-1);
@@ -363,6 +365,22 @@ get_stats(zfs_handle_t *zhp)
return (0);
}
+static int
+get_stats(zfs_handle_t *zhp)
+{
+ int rc = 0;
+ zfs_cmd_t zc = { 0 };
+
+ if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+ return (-1);
+ if (get_stats_ioctl(zhp, &zc) != 0)
+ rc = -1;
+ else if (put_stats_zhdl(zhp, &zc) != 0)
+ rc = -1;
+ zcmd_free_nvlists(&zc);
+ return (rc);
+}
+
/*
* Refresh the properties currently stored in the handle.
*/
@@ -376,16 +394,11 @@ zfs_refresh_properties(zfs_handle_t *zhp)
* Makes a handle from the given dataset name. Used by zfs_open() and
* zfs_iter_* to create child handles on the fly.
*/
-zfs_handle_t *
-make_dataset_handle(libzfs_handle_t *hdl, const char *path)
+static int
+make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
- zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
char *logstr;
-
- if (zhp == NULL)
- return (NULL);
-
- zhp->zfs_hdl = hdl;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
/*
* Preserve history log string.
@@ -394,17 +407,16 @@ make_dataset_handle(libzfs_handle_t *hdl, const char *path)
*/
logstr = zhp->zfs_hdl->libzfs_log_str;
zhp->zfs_hdl->libzfs_log_str = NULL;
-top:
- (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
- if (get_stats(zhp) != 0) {
+top:
+ if (put_stats_zhdl(zhp, zc) != 0) {
zhp->zfs_hdl->libzfs_log_str = logstr;
- free(zhp);
- return (NULL);
+ return (-1);
}
+
if (zhp->zfs_dmustats.dds_inconsistent) {
- zfs_cmd_t zc = { 0 };
+ zfs_cmd_t zc2 = { 0 };
/*
* If it is dds_inconsistent, then we've caught it in
@@ -421,28 +433,33 @@ top:
* will fail with EBUSY and we will drive on as usual.
*/
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc2.zc_name, zhp->zfs_name,
+ sizeof (zc2.zc_name));
if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
(void) zvol_remove_link(hdl, zhp->zfs_name);
- zc.zc_objset_type = DMU_OST_ZVOL;
+ zc2.zc_objset_type = DMU_OST_ZVOL;
} else {
- zc.zc_objset_type = DMU_OST_ZFS;
+ zc2.zc_objset_type = DMU_OST_ZFS;
}
/*
* If we can successfully destroy it, pretend that it
* never existed.
*/
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) {
zhp->zfs_hdl->libzfs_log_str = logstr;
- free(zhp);
errno = ENOENT;
- return (NULL);
+ return (-1);
}
- /* If we can successfully roll it back, reget the stats */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
+ /* If we can successfully roll it back, reset the stats */
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) {
+ if (get_stats_ioctl(zhp, zc) != 0) {
+ zhp->zfs_hdl->libzfs_log_str = logstr;
+ return (-1);
+ }
goto top;
+ }
}
/*
@@ -467,6 +484,52 @@ top:
zhp->zfs_hdl->libzfs_log_str = logstr;
zhp->zpool_hdl = zpool_handle(zhp);
+ return (0);
+}
+
+zfs_handle_t *
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
+{
+ zfs_cmd_t zc = { 0 };
+
+ zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+
+ if (zhp == NULL)
+ return (NULL);
+
+ zhp->zfs_hdl = hdl;
+ (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) {
+ free(zhp);
+ return (NULL);
+ }
+ if (get_stats_ioctl(zhp, &zc) == -1) {
+ zcmd_free_nvlists(&zc);
+ free(zhp);
+ return (NULL);
+ }
+ if (make_dataset_handle_common(zhp, &zc) == -1) {
+ free(zhp);
+ zhp = NULL;
+ }
+ zcmd_free_nvlists(&zc);
+ return (zhp);
+}
+
+static zfs_handle_t *
+make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+ zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+
+ if (zhp == NULL)
+ return (NULL);
+
+ zhp->zfs_hdl = hdl;
+ (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name));
+ if (make_dataset_handle_common(zhp, zc) == -1) {
+ free(zhp);
+ return (NULL);
+ }
return (zhp);
}
@@ -525,6 +588,141 @@ zfs_close(zfs_handle_t *zhp)
free(zhp);
}
+typedef struct mnttab_node {
+ struct mnttab mtn_mt;
+ avl_node_t mtn_node;
+} mnttab_node_t;
+
+static int
+libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
+{
+ const mnttab_node_t *mtn1 = arg1;
+ const mnttab_node_t *mtn2 = arg2;
+ int rv;
+
+ rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special);
+
+ if (rv == 0)
+ return (0);
+ return (rv > 0 ? 1 : -1);
+}
+
+void
+libzfs_mnttab_init(libzfs_handle_t *hdl)
+{
+ assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
+ avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
+ sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
+}
+
+void
+libzfs_mnttab_update(libzfs_handle_t *hdl)
+{
+ struct mnttab entry;
+
+ rewind(hdl->libzfs_mnttab);
+ while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
+ mnttab_node_t *mtn;
+
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
+ continue;
+ mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
+ mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special);
+ mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp);
+ mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype);
+ mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts);
+ avl_add(&hdl->libzfs_mnttab_cache, mtn);
+ }
+}
+
+void
+libzfs_mnttab_fini(libzfs_handle_t *hdl)
+{
+ void *cookie = NULL;
+ mnttab_node_t *mtn;
+
+ while (mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie)) {
+ free(mtn->mtn_mt.mnt_special);
+ free(mtn->mtn_mt.mnt_mountp);
+ free(mtn->mtn_mt.mnt_fstype);
+ free(mtn->mtn_mt.mnt_mntopts);
+ free(mtn);
+ }
+ avl_destroy(&hdl->libzfs_mnttab_cache);
+}
+
+void
+libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable)
+{
+ hdl->libzfs_mnttab_enable = enable;
+}
+
+int
+libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
+ struct mnttab *entry)
+{
+ mnttab_node_t find;
+ mnttab_node_t *mtn;
+
+ if (!hdl->libzfs_mnttab_enable) {
+ struct mnttab srch = { 0 };
+
+ if (avl_numnodes(&hdl->libzfs_mnttab_cache))
+ libzfs_mnttab_fini(hdl);
+ rewind(hdl->libzfs_mnttab);
+ srch.mnt_special = (char *)fsname;
+ srch.mnt_fstype = MNTTYPE_ZFS;
+ if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0)
+ return (0);
+ else
+ return (ENOENT);
+ }
+
+ if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
+ libzfs_mnttab_update(hdl);
+
+ find.mtn_mt.mnt_special = (char *)fsname;
+ mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
+ if (mtn) {
+ *entry = mtn->mtn_mt;
+ return (0);
+ }
+ return (ENOENT);
+}
+
+void
+libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
+ const char *mountp, const char *mntopts)
+{
+ mnttab_node_t *mtn;
+
+ if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
+ return;
+ mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
+ mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
+ mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
+ mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
+ mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
+ avl_add(&hdl->libzfs_mnttab_cache, mtn);
+}
+
+void
+libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
+{
+ mnttab_node_t find;
+ mnttab_node_t *ret;
+
+ find.mtn_mt.mnt_special = (char *)fsname;
+ if (ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) {
+ avl_remove(&hdl->libzfs_mnttab_cache, ret);
+ free(ret->mtn_mt.mnt_special);
+ free(ret->mtn_mt.mnt_mountp);
+ free(ret->mtn_mt.mnt_fstype);
+ free(ret->mtn_mt.mnt_mntopts);
+ free(ret);
+ }
+}
+
int
zfs_spa_version(zfs_handle_t *zhp, int *spa_version)
{
@@ -579,23 +777,18 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
return (NULL);
}
+ /*
+ * Make sure this property is valid and applies to this type.
+ */
+
elem = NULL;
while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
const char *propname = nvpair_name(elem);
- /*
- * Make sure this property is valid and applies to this type.
- */
- if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
- if (!zfs_prop_user(propname)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid property '%s'"), propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
-
+ prop = zfs_name_to_prop(propname);
+ if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
/*
- * If this is a user property, make sure it's a
+ * This is a user property: make sure it's a
* string, and that it's less than ZAP_MAXNAMELEN.
*/
if (nvpair_type(elem) != DATA_TYPE_STRING) {
@@ -621,6 +814,10 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
continue;
}
+ /*
+ * Currently, only user properties can be modified on
+ * snapshots.
+ */
if (type == ZFS_TYPE_SNAPSHOT) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"this property can not be modified for snapshots"));
@@ -628,6 +825,80 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
goto error;
}
+ if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
+ zfs_userquota_prop_t uqtype;
+ char newpropname[128];
+ char domain[128];
+ uint64_t rid;
+ uint64_t valary[3];
+
+ if (userquota_propname_decode(propname, zoned,
+ &uqtype, domain, sizeof (domain), &rid) != 0) {
+ zfs_error_aux(hdl,
+ dgettext(TEXT_DOMAIN,
+ "'%s' has an invalid user/group name"),
+ propname);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+
+ if (uqtype != ZFS_PROP_USERQUOTA &&
+ uqtype != ZFS_PROP_GROUPQUOTA) {
+ zfs_error_aux(hdl,
+ dgettext(TEXT_DOMAIN, "'%s' is readonly"),
+ propname);
+ (void) zfs_error(hdl, EZFS_PROPREADONLY,
+ errbuf);
+ goto error;
+ }
+
+ if (nvpair_type(elem) == DATA_TYPE_STRING) {
+ (void) nvpair_value_string(elem, &strval);
+ if (strcmp(strval, "none") == 0) {
+ intval = 0;
+ } else if (zfs_nicestrtonum(hdl,
+ strval, &intval) != 0) {
+ (void) zfs_error(hdl,
+ EZFS_BADPROP, errbuf);
+ goto error;
+ }
+ } else if (nvpair_type(elem) ==
+ DATA_TYPE_UINT64) {
+ (void) nvpair_value_uint64(elem, &intval);
+ if (intval == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "use 'none' to disable "
+ "userquota/groupquota"));
+ goto error;
+ }
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'%s' must be a number"), propname);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+
+ (void) snprintf(newpropname, sizeof (newpropname),
+ "%s%s", zfs_userquota_prop_prefixes[uqtype],
+ domain);
+ valary[0] = uqtype;
+ valary[1] = rid;
+ valary[2] = intval;
+ if (nvlist_add_uint64_array(ret, newpropname,
+ valary, 3) != 0) {
+ (void) no_memory(hdl);
+ goto error;
+ }
+ continue;
+ }
+
+ if (prop == ZPROP_INVAL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid property '%s'"), propname);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+
if (!zfs_prop_valid_for_type(prop, type)) {
zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "'%s' does not "
@@ -767,7 +1038,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
} else if (getzoneid() != GLOBAL_ZONEID) {
/*
* If zoned property is 'off', this must be in
- * a globle zone. If not, something is wrong.
+ * a global zone. If not, something is wrong.
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' cannot be set while dataset "
@@ -951,808 +1222,6 @@ error:
return (NULL);
}
-static int
-zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
- uint64_t *ret_who)
-{
- struct passwd *pwd;
- struct group *grp;
- uid_t id;
-
- if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
- *who_type == ZFS_DELEG_NAMED_SET) {
- *ret_who = -1;
- return (0);
- }
- if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
- return (EZFS_BADWHO);
-
- if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
- strcmp(who, "everyone") == 0) {
- *ret_who = -1;
- *who_type = ZFS_DELEG_EVERYONE;
- return (0);
- }
-
- pwd = getpwnam(who);
- grp = getgrnam(who);
-
- if ((*who_type == ZFS_DELEG_USER) && pwd) {
- *ret_who = pwd->pw_uid;
- } else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
- *ret_who = grp->gr_gid;
- } else if (pwd) {
- *ret_who = pwd->pw_uid;
- *who_type = ZFS_DELEG_USER;
- } else if (grp) {
- *ret_who = grp->gr_gid;
- *who_type = ZFS_DELEG_GROUP;
- } else {
- char *end;
-
- id = strtol(who, &end, 10);
- if (errno != 0 || *end != '\0') {
- return (EZFS_BADWHO);
- } else {
- *ret_who = id;
- if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
- *who_type = ZFS_DELEG_USER;
- }
- }
-
- return (0);
-}
-
-static void
-zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
-{
- if (perms_nvp != NULL) {
- verify(nvlist_add_nvlist(who_nvp,
- name, perms_nvp) == 0);
- } else {
- verify(nvlist_add_boolean(who_nvp, name) == 0);
- }
-}
-
-static void
-helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
- zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
- nvlist_t *sets_nvp)
-{
- boolean_t do_perms, do_sets;
- char name[ZFS_MAX_DELEG_NAME];
-
- do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
- do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
-
- if (!do_perms && !do_sets)
- do_perms = do_sets = B_TRUE;
-
- if (do_perms) {
- zfs_deleg_whokey(name, who_type, inherit,
- (who_type == ZFS_DELEG_NAMED_SET) ?
- whostr : (void *)&whoid);
- zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
- }
- if (do_sets) {
- zfs_deleg_whokey(name, toupper(who_type), inherit,
- (who_type == ZFS_DELEG_NAMED_SET) ?
- whostr : (void *)&whoid);
- zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
- }
-}
-
-static void
-zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
- nvlist_t *perms_nvp, nvlist_t *sets_nvp,
- zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
-{
- if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
- helper(who_type, whoid, whostr, 0,
- who_nvp, perms_nvp, sets_nvp);
- } else {
- if (inherit & ZFS_DELEG_PERM_LOCAL) {
- helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
- who_nvp, perms_nvp, sets_nvp);
- }
- if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
- helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
- who_nvp, perms_nvp, sets_nvp);
- }
- }
-}
-
-/*
- * Construct nvlist to pass down to kernel for setting/removing permissions.
- *
- * The nvlist is constructed as a series of nvpairs with an optional embedded
- * nvlist of permissions to remove or set. The topmost nvpairs are the actual
- * base attribute named stored in the dsl.
- * Arguments:
- *
- * whostr: is a comma separated list of users, groups, or a single set name.
- * whostr may be null for everyone or create perms.
- * who_type: is the type of entry in whostr. Typically this will be
- * ZFS_DELEG_WHO_UNKNOWN.
- * perms: common separated list of permissions. May be null if user
- * is requested to remove permissions by who.
- * inherit: Specifies the inheritance of the permissions. Will be either
- * ZFS_DELEG_PERM_LOCAL and/or ZFS_DELEG_PERM_DESCENDENT.
- * nvp The constructed nvlist to pass to zfs_perm_set().
- * The output nvp will look something like this.
- * ul$1234 -> {create ; destroy }
- * Ul$1234 -> { @myset }
- * s-$@myset - { snapshot; checksum; compression }
- */
-int
-zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
- zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
-{
- nvlist_t *who_nvp;
- nvlist_t *perms_nvp = NULL;
- nvlist_t *sets_nvp = NULL;
- char errbuf[1024];
- char *who_tok, *perm;
- int error;
-
- *nvp = NULL;
-
- if (perms) {
- if ((error = nvlist_alloc(&perms_nvp,
- NV_UNIQUE_NAME, 0)) != 0) {
- return (1);
- }
- if ((error = nvlist_alloc(&sets_nvp,
- NV_UNIQUE_NAME, 0)) != 0) {
- nvlist_free(perms_nvp);
- return (1);
- }
- }
-
- if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
- if (perms_nvp)
- nvlist_free(perms_nvp);
- if (sets_nvp)
- nvlist_free(sets_nvp);
- return (1);
- }
-
- if (who_type == ZFS_DELEG_NAMED_SET) {
- namecheck_err_t why;
- char what;
-
- if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
- nvlist_free(who_nvp);
- if (perms_nvp)
- nvlist_free(perms_nvp);
- if (sets_nvp)
- nvlist_free(sets_nvp);
-
- switch (why) {
- case NAME_ERR_NO_AT:
- zfs_error_aux(zhp->zfs_hdl,
- dgettext(TEXT_DOMAIN,
- "set definition must begin with an '@' "
- "character"));
- }
- return (zfs_error(zhp->zfs_hdl,
- EZFS_BADPERMSET, whostr));
- }
- }
-
- /*
- * Build up nvlist(s) of permissions. Two nvlists are maintained.
- * The first nvlist perms_nvp will have normal permissions and the
- * other sets_nvp will have only permssion set names in it.
- */
- for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
- const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
-
- if (perm_canonical) {
- verify(nvlist_add_boolean(perms_nvp,
- perm_canonical) == 0);
- } else if (perm[0] == '@') {
- verify(nvlist_add_boolean(sets_nvp, perm) == 0);
- } else {
- nvlist_free(who_nvp);
- nvlist_free(perms_nvp);
- nvlist_free(sets_nvp);
- return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
- }
- }
-
- if (whostr && who_type != ZFS_DELEG_CREATE) {
- who_tok = strtok(whostr, ",");
- if (who_tok == NULL) {
- nvlist_free(who_nvp);
- if (perms_nvp)
- nvlist_free(perms_nvp);
- if (sets_nvp)
- nvlist_free(sets_nvp);
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "Who string is NULL"),
- whostr);
- return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
- }
- }
-
- /*
- * Now create the nvlist(s)
- */
- do {
- uint64_t who_id;
-
- error = zfs_get_perm_who(who_tok, &who_type,
- &who_id);
- if (error) {
- nvlist_free(who_nvp);
- if (perms_nvp)
- nvlist_free(perms_nvp);
- if (sets_nvp)
- nvlist_free(sets_nvp);
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN,
- "Unable to determine uid/gid for "
- "%s "), who_tok);
- return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
- }
-
- /*
- * add entries for both local and descendent when required
- */
- zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
- perms_nvp, sets_nvp, who_type, inherit);
-
- } while (who_tok = strtok(NULL, ","));
- *nvp = who_nvp;
- return (0);
-}
-
-static int
-zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
-{
- zfs_cmd_t zc = { 0 };
- int error;
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
- zhp->zfs_name);
-
- if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
- return (-1);
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- zc.zc_perm_action = unset;
-
- error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
- if (error && errno == ENOTSUP) {
- (void) snprintf(errbuf, sizeof (errbuf),
- gettext("Pool must be upgraded to use 'allow/unallow'"));
- zcmd_free_nvlists(&zc);
- return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
- } else if (error) {
- return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
- }
- zcmd_free_nvlists(&zc);
-
- return (error);
-}
-
-int
-zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
-{
- return (zfs_perm_set_common(zhp, nvp, B_FALSE));
-}
-
-int
-zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
-{
- return (zfs_perm_set_common(zhp, perms, B_TRUE));
-}
-
-static int
-perm_compare(const void *arg1, const void *arg2)
-{
- const zfs_perm_node_t *node1 = arg1;
- const zfs_perm_node_t *node2 = arg2;
- int ret;
-
- ret = strcmp(node1->z_pname, node2->z_pname);
-
- if (ret > 0)
- return (1);
- if (ret < 0)
- return (-1);
- else
- return (0);
-}
-
-static void
-zfs_destroy_perm_tree(avl_tree_t *tree)
-{
- zfs_perm_node_t *permnode;
- void *cookie = NULL;
-
- while ((permnode = avl_destroy_nodes(tree, &cookie)) != NULL)
- free(permnode);
- avl_destroy(tree);
-}
-
-static void
-zfs_destroy_tree(avl_tree_t *tree)
-{
- zfs_allow_node_t *allownode;
- void *cookie = NULL;
-
- while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
- zfs_destroy_perm_tree(&allownode->z_localdescend);
- zfs_destroy_perm_tree(&allownode->z_local);
- zfs_destroy_perm_tree(&allownode->z_descend);
- free(allownode);
- }
- avl_destroy(tree);
-}
-
-void
-zfs_free_allows(zfs_allow_t *allow)
-{
- zfs_allow_t *allownext;
- zfs_allow_t *freeallow;
-
- allownext = allow;
- while (allownext) {
- zfs_destroy_tree(&allownext->z_sets);
- zfs_destroy_tree(&allownext->z_crperms);
- zfs_destroy_tree(&allownext->z_user);
- zfs_destroy_tree(&allownext->z_group);
- zfs_destroy_tree(&allownext->z_everyone);
- freeallow = allownext;
- allownext = allownext->z_next;
- free(freeallow);
- }
-}
-
-static zfs_allow_t *
-zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
-{
- zfs_allow_t *ptree;
-
- if ((ptree = zfs_alloc(zhp->zfs_hdl,
- sizeof (zfs_allow_t))) == NULL) {
- return (NULL);
- }
-
- (void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
- avl_create(&ptree->z_sets,
- perm_compare, sizeof (zfs_allow_node_t),
- offsetof(zfs_allow_node_t, z_node));
- avl_create(&ptree->z_crperms,
- perm_compare, sizeof (zfs_allow_node_t),
- offsetof(zfs_allow_node_t, z_node));
- avl_create(&ptree->z_user,
- perm_compare, sizeof (zfs_allow_node_t),
- offsetof(zfs_allow_node_t, z_node));
- avl_create(&ptree->z_group,
- perm_compare, sizeof (zfs_allow_node_t),
- offsetof(zfs_allow_node_t, z_node));
- avl_create(&ptree->z_everyone,
- perm_compare, sizeof (zfs_allow_node_t),
- offsetof(zfs_allow_node_t, z_node));
-
- if (prev)
- prev->z_next = ptree;
- ptree->z_next = NULL;
- return (ptree);
-}
-
-/*
- * Add permissions to the appropriate AVL permission tree.
- * The appropriate tree may not be the requested tree.
- * For example if ld indicates a local permission, but
- * same permission also exists as a descendent permission
- * then the permission will be removed from the descendent
- * tree and add the the local+descendent tree.
- */
-static int
-zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
- char *perm, char ld)
-{
- zfs_perm_node_t pnode, *permnode, *permnode2;
- zfs_perm_node_t *newnode;
- avl_index_t where, where2;
- avl_tree_t *tree, *altree;
-
- (void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
-
- if (ld == ZFS_DELEG_NA) {
- tree = &allownode->z_localdescend;
- altree = &allownode->z_descend;
- } else if (ld == ZFS_DELEG_LOCAL) {
- tree = &allownode->z_local;
- altree = &allownode->z_descend;
- } else {
- tree = &allownode->z_descend;
- altree = &allownode->z_local;
- }
- permnode = avl_find(tree, &pnode, &where);
- permnode2 = avl_find(altree, &pnode, &where2);
-
- if (permnode2) {
- avl_remove(altree, permnode2);
- free(permnode2);
- if (permnode == NULL) {
- tree = &allownode->z_localdescend;
- }
- }
-
- /*
- * Now insert new permission in either requested location
- * local/descendent or into ld when perm will exist in both.
- */
- if (permnode == NULL) {
- if ((newnode = zfs_alloc(zhp->zfs_hdl,
- sizeof (zfs_perm_node_t))) == NULL) {
- return (-1);
- }
- *newnode = pnode;
- avl_add(tree, newnode);
- }
- return (0);
-}
-
-/*
- * Uggh, this is going to be a bit complicated.
- * we have an nvlist coming out of the kernel that
- * will indicate where the permission is set and then
- * it will contain allow of the various "who's", and what
- * their permissions are. To further complicate this
- * we will then have to coalesce the local,descendent
- * and local+descendent permissions where appropriate.
- * The kernel only knows about a permission as being local
- * or descendent, but not both.
- *
- * In order to make this easier for zfs_main to deal with
- * a series of AVL trees will be used to maintain
- * all of this, primarily for sorting purposes as well
- * as the ability to quickly locate a specific entry.
- *
- * What we end up with are tree's for sets, create perms,
- * user, groups and everyone. With each of those trees
- * we have subtrees for local, descendent and local+descendent
- * permissions.
- */
-int
-zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
-{
- zfs_cmd_t zc = { 0 };
- int error;
- nvlist_t *nvlist;
- nvlist_t *permnv, *sourcenv;
- nvpair_t *who_pair, *source_pair;
- nvpair_t *perm_pair;
- char errbuf[1024];
- zfs_allow_t *zallowp, *newallowp;
- char ld;
- char *nvpname;
- uid_t uid;
- gid_t gid;
- avl_tree_t *tree;
- avl_index_t where;
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
- return (-1);
-
- while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
- if (errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- } else if (errno == ENOTSUP) {
- zcmd_free_nvlists(&zc);
- (void) snprintf(errbuf, sizeof (errbuf),
- gettext("Pool must be upgraded to use 'allow'"));
- return (zfs_error(zhp->zfs_hdl,
- EZFS_BADVERSION, errbuf));
- } else {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- }
-
- if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
-
- zcmd_free_nvlists(&zc);
-
- source_pair = nvlist_next_nvpair(nvlist, NULL);
-
- if (source_pair == NULL) {
- *zfs_perms = NULL;
- return (0);
- }
-
- *zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
- if (*zfs_perms == NULL) {
- return (0);
- }
-
- zallowp = *zfs_perms;
-
- for (;;) {
- struct passwd *pwd;
- struct group *grp;
- zfs_allow_node_t *allownode;
- zfs_allow_node_t findallownode;
- zfs_allow_node_t *newallownode;
-
- (void) strlcpy(zallowp->z_setpoint,
- nvpair_name(source_pair),
- sizeof (zallowp->z_setpoint));
-
- if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
- goto abort;
-
- /*
- * Make sure nvlist is composed correctly
- */
- if (zfs_deleg_verify_nvlist(sourcenv)) {
- goto abort;
- }
-
- who_pair = nvlist_next_nvpair(sourcenv, NULL);
- if (who_pair == NULL) {
- goto abort;
- }
-
- do {
- error = nvpair_value_nvlist(who_pair, &permnv);
- if (error) {
- goto abort;
- }
-
- /*
- * First build up the key to use
- * for looking up in the various
- * who trees.
- */
- ld = nvpair_name(who_pair)[1];
- nvpname = nvpair_name(who_pair);
- switch (nvpair_name(who_pair)[0]) {
- case ZFS_DELEG_USER:
- case ZFS_DELEG_USER_SETS:
- tree = &zallowp->z_user;
- uid = atol(&nvpname[3]);
- pwd = getpwuid(uid);
- (void) snprintf(findallownode.z_key,
- sizeof (findallownode.z_key), "user %s",
- (pwd) ? pwd->pw_name :
- &nvpair_name(who_pair)[3]);
- break;
- case ZFS_DELEG_GROUP:
- case ZFS_DELEG_GROUP_SETS:
- tree = &zallowp->z_group;
- gid = atol(&nvpname[3]);
- grp = getgrgid(gid);
- (void) snprintf(findallownode.z_key,
- sizeof (findallownode.z_key), "group %s",
- (grp) ? grp->gr_name :
- &nvpair_name(who_pair)[3]);
- break;
- case ZFS_DELEG_CREATE:
- case ZFS_DELEG_CREATE_SETS:
- tree = &zallowp->z_crperms;
- (void) strlcpy(findallownode.z_key, "",
- sizeof (findallownode.z_key));
- break;
- case ZFS_DELEG_EVERYONE:
- case ZFS_DELEG_EVERYONE_SETS:
- (void) snprintf(findallownode.z_key,
- sizeof (findallownode.z_key), "everyone");
- tree = &zallowp->z_everyone;
- break;
- case ZFS_DELEG_NAMED_SET:
- case ZFS_DELEG_NAMED_SET_SETS:
- (void) snprintf(findallownode.z_key,
- sizeof (findallownode.z_key), "%s",
- &nvpair_name(who_pair)[3]);
- tree = &zallowp->z_sets;
- break;
- }
-
- /*
- * Place who in tree
- */
- allownode = avl_find(tree, &findallownode, &where);
- if (allownode == NULL) {
- if ((newallownode = zfs_alloc(zhp->zfs_hdl,
- sizeof (zfs_allow_node_t))) == NULL) {
- goto abort;
- }
- avl_create(&newallownode->z_localdescend,
- perm_compare,
- sizeof (zfs_perm_node_t),
- offsetof(zfs_perm_node_t, z_node));
- avl_create(&newallownode->z_local,
- perm_compare,
- sizeof (zfs_perm_node_t),
- offsetof(zfs_perm_node_t, z_node));
- avl_create(&newallownode->z_descend,
- perm_compare,
- sizeof (zfs_perm_node_t),
- offsetof(zfs_perm_node_t, z_node));
- (void) strlcpy(newallownode->z_key,
- findallownode.z_key,
- sizeof (findallownode.z_key));
- avl_insert(tree, newallownode, where);
- allownode = newallownode;
- }
-
- /*
- * Now iterate over the permissions and
- * place them in the appropriate local,
- * descendent or local+descendent tree.
- *
- * The permissions are added to the tree
- * via zfs_coalesce_perm().
- */
- perm_pair = nvlist_next_nvpair(permnv, NULL);
- if (perm_pair == NULL)
- goto abort;
- do {
- if (zfs_coalesce_perm(zhp, allownode,
- nvpair_name(perm_pair), ld) != 0)
- goto abort;
- } while (perm_pair = nvlist_next_nvpair(permnv,
- perm_pair));
- } while (who_pair = nvlist_next_nvpair(sourcenv, who_pair));
-
- source_pair = nvlist_next_nvpair(nvlist, source_pair);
- if (source_pair == NULL)
- break;
-
- /*
- * allocate another node from the link list of
- * zfs_allow_t structures
- */
- newallowp = zfs_alloc_perm_tree(zhp, zallowp,
- nvpair_name(source_pair));
- if (newallowp == NULL) {
- goto abort;
- }
- zallowp = newallowp;
- }
- nvlist_free(nvlist);
- return (0);
-abort:
- zfs_free_allows(*zfs_perms);
- nvlist_free(nvlist);
- return (-1);
-}
-
-static char *
-zfs_deleg_perm_note(zfs_deleg_note_t note)
-{
- /*
- * Don't put newlines on end of lines
- */
- switch (note) {
- case ZFS_DELEG_NOTE_CREATE:
- return (dgettext(TEXT_DOMAIN,
- "Must also have the 'mount' ability"));
- case ZFS_DELEG_NOTE_DESTROY:
- return (dgettext(TEXT_DOMAIN,
- "Must also have the 'mount' ability"));
- case ZFS_DELEG_NOTE_SNAPSHOT:
- return (dgettext(TEXT_DOMAIN,
- "Must also have the 'mount' ability"));
- case ZFS_DELEG_NOTE_ROLLBACK:
- return (dgettext(TEXT_DOMAIN,
- "Must also have the 'mount' ability"));
- case ZFS_DELEG_NOTE_CLONE:
- return (dgettext(TEXT_DOMAIN, "Must also have the 'create' "
- "ability and 'mount'\n"
- "\t\t\t\tability in the origin file system"));
- case ZFS_DELEG_NOTE_PROMOTE:
- return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'\n"
- "\t\t\t\tand 'promote' ability in the origin file system"));
- case ZFS_DELEG_NOTE_RENAME:
- return (dgettext(TEXT_DOMAIN, "Must also have the 'mount' "
- "and 'create' \n\t\t\t\tability in the new parent"));
- case ZFS_DELEG_NOTE_RECEIVE:
- return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'"
- " and 'create' ability"));
- case ZFS_DELEG_NOTE_USERPROP:
- return (dgettext(TEXT_DOMAIN,
- "Allows changing any user property"));
- case ZFS_DELEG_NOTE_ALLOW:
- return (dgettext(TEXT_DOMAIN,
- "Must also have the permission that is being\n"
- "\t\t\t\tallowed"));
- case ZFS_DELEG_NOTE_MOUNT:
- return (dgettext(TEXT_DOMAIN,
- "Allows mount/umount of ZFS datasets"));
- case ZFS_DELEG_NOTE_SHARE:
- return (dgettext(TEXT_DOMAIN,
- "Allows sharing file systems over NFS or SMB\n"
- "\t\t\t\tprotocols"));
- case ZFS_DELEG_NOTE_NONE:
- default:
- return (dgettext(TEXT_DOMAIN, ""));
- }
-}
-
-typedef enum {
- ZFS_DELEG_SUBCOMMAND,
- ZFS_DELEG_PROP,
- ZFS_DELEG_OTHER
-} zfs_deleg_perm_type_t;
-
-/*
- * is the permission a subcommand or other?
- */
-zfs_deleg_perm_type_t
-zfs_deleg_perm_type(const char *perm)
-{
- if (strcmp(perm, "userprop") == 0)
- return (ZFS_DELEG_OTHER);
- else
- return (ZFS_DELEG_SUBCOMMAND);
-}
-
-static char *
-zfs_deleg_perm_type_str(zfs_deleg_perm_type_t type)
-{
- switch (type) {
- case ZFS_DELEG_SUBCOMMAND:
- return (dgettext(TEXT_DOMAIN, "subcommand"));
- case ZFS_DELEG_PROP:
- return (dgettext(TEXT_DOMAIN, "property"));
- case ZFS_DELEG_OTHER:
- return (dgettext(TEXT_DOMAIN, "other"));
- }
- return ("");
-}
-
-/*ARGSUSED*/
-static int
-zfs_deleg_prop_cb(int prop, void *cb)
-{
- if (zfs_prop_delegatable(prop))
- (void) fprintf(stderr, "%-15s %-15s\n", zfs_prop_to_name(prop),
- zfs_deleg_perm_type_str(ZFS_DELEG_PROP));
-
- return (ZPROP_CONT);
-}
-
-void
-zfs_deleg_permissions(void)
-{
- int i;
-
- (void) fprintf(stderr, "\n%-15s %-15s\t%s\n\n", "NAME",
- "TYPE", "NOTES");
-
- /*
- * First print out the subcommands
- */
- for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
- (void) fprintf(stderr, "%-15s %-15s\t%s\n",
- zfs_deleg_perm_tab[i].z_perm,
- zfs_deleg_perm_type_str(
- zfs_deleg_perm_type(zfs_deleg_perm_tab[i].z_perm)),
- zfs_deleg_perm_note(zfs_deleg_perm_tab[i].z_note));
- }
-
- (void) zprop_iter(zfs_deleg_prop_cb, NULL, B_FALSE, B_TRUE,
- ZFS_TYPE_DATASET|ZFS_TYPE_VOLUME);
-}
-
/*
* Given a property name and value, set the property for the given dataset.
*/
@@ -1834,6 +1303,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
goto error;
ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
+
if (ret != 0) {
switch (errno) {
@@ -2140,15 +1610,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
*/
if (!zhp->zfs_mntcheck &&
(mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
- struct mnttab entry, search = { 0 };
- FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
-
- search.mnt_special = (char *)zhp->zfs_name;
- search.mnt_fstype = MNTTYPE_ZFS;
- rewind(mnttab);
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ struct mnttab entry;
- if (getmntany(mnttab, &entry, &search) == 0) {
- zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
+ if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) {
+ zhp->zfs_mntopts = zfs_strdup(hdl,
entry.mnt_mntopts);
if (zhp->zfs_mntopts == NULL)
return (-1);
@@ -2247,7 +1713,7 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
case PROP_TYPE_INDEX:
*val = getprop_uint64(zhp, prop, source);
/*
- * If we tried to use a defalut value for a
+ * If we tried to use a default value for a
* readonly property, it means that it was not
* present; return an error.
*/
@@ -2541,7 +2007,7 @@ zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
{
char buf[64];
- zfs_nicenum(val, buf, sizeof (buf));
+ (void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val);
return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
}
@@ -2574,6 +2040,247 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
return (0);
}
+static int
+idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
+ char **domainp, idmap_rid_t *ridp)
+{
+#ifdef sun
+ idmap_handle_t *idmap_hdl = NULL;
+ idmap_get_handle_t *get_hdl = NULL;
+ idmap_stat status;
+ int err = EINVAL;
+
+ if (idmap_init(&idmap_hdl) != IDMAP_SUCCESS)
+ goto out;
+ if (idmap_get_create(idmap_hdl, &get_hdl) != IDMAP_SUCCESS)
+ goto out;
+
+ if (isuser) {
+ err = idmap_get_sidbyuid(get_hdl, id,
+ IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
+ } else {
+ err = idmap_get_sidbygid(get_hdl, id,
+ IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
+ }
+ if (err == IDMAP_SUCCESS &&
+ idmap_get_mappings(get_hdl) == IDMAP_SUCCESS &&
+ status == IDMAP_SUCCESS)
+ err = 0;
+ else
+ err = EINVAL;
+out:
+ if (get_hdl)
+ idmap_get_destroy(get_hdl);
+ if (idmap_hdl)
+ (void) idmap_fini(idmap_hdl);
+ return (err);
+#else /* !sun */
+ assert(!"invalid code path");
+#endif /* !sun */
+}
+
+#ifndef sun
+/* Check if a string contains only digits */
+static int
+string_is_digits(char *cp)
+{
+ int i;
+
+ for(i = 0; i < strlen(cp); i++)
+ if(!isdigit(cp[i]))
+ return (0);
+ return (1);
+}
+
+#endif /* !sun */
+
+/*
+ * convert the propname into parameters needed by kernel
+ * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829
+ * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789
+ */
+static int
+userquota_propname_decode(const char *propname, boolean_t zoned,
+ zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp)
+{
+ zfs_userquota_prop_t type;
+ char *cp, *end;
+ char *numericsid = NULL;
+ boolean_t isuser;
+
+ domain[0] = '\0';
+
+ /* Figure out the property type ({user|group}{quota|space}) */
+ for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
+ if (strncmp(propname, zfs_userquota_prop_prefixes[type],
+ strlen(zfs_userquota_prop_prefixes[type])) == 0)
+ break;
+ }
+ if (type == ZFS_NUM_USERQUOTA_PROPS)
+ return (EINVAL);
+ *typep = type;
+
+ isuser = (type == ZFS_PROP_USERQUOTA ||
+ type == ZFS_PROP_USERUSED);
+
+ cp = strchr(propname, '@') + 1;
+
+ if (strchr(cp, '@')) {
+#ifdef sun
+ /*
+ * It's a SID name (eg "user@domain") that needs to be
+ * turned into S-1-domainID-RID.
+ */
+ directory_error_t e;
+
+ if (zoned && getzoneid() == GLOBAL_ZONEID)
+ return (ENOENT);
+ if (isuser) {
+ e = directory_sid_from_user_name(NULL,
+ cp, &numericsid);
+ } else {
+ e = directory_sid_from_group_name(NULL,
+ cp, &numericsid);
+ }
+ if (e != NULL) {
+ directory_error_free(e);
+ return (ENOENT);
+ }
+ if (numericsid == NULL)
+ return (ENOENT);
+ cp = numericsid;
+ /* will be further decoded below */
+#else /* !sun */
+ return (ENOENT);
+#endif /* !sun */
+ }
+
+ if (strncmp(cp, "S-1-", 4) == 0) {
+ /* It's a numeric SID (eg "S-1-234-567-89") */
+ (void) strlcpy(domain, cp, domainlen);
+ cp = strrchr(domain, '-');
+ *cp = '\0';
+ cp++;
+
+ errno = 0;
+ *ridp = strtoull(cp, &end, 10);
+ if (numericsid) {
+ free(numericsid);
+ numericsid = NULL;
+ }
+ if (errno != 0 || *end != '\0')
+ return (EINVAL);
+#ifdef sun
+ } else if (!isdigit(*cp)) {
+#else /* sun */
+ /*
+ * In FreeBSD user and group names can begin with a digit so treat
+ * as a uid/gid if string contains digits only
+ */
+ } else if (!string_is_digits(cp)) {
+#endif /* sun */
+ /*
+ * It's a user/group name (eg "user") that needs to be
+ * turned into a uid/gid
+ */
+ if (zoned && getzoneid() == GLOBAL_ZONEID)
+ return (ENOENT);
+ if (isuser) {
+ struct passwd *pw;
+ pw = getpwnam(cp);
+ if (pw == NULL)
+ return (ENOENT);
+ *ridp = pw->pw_uid;
+ } else {
+ struct group *gr;
+ gr = getgrnam(cp);
+ if (gr == NULL)
+ return (ENOENT);
+ *ridp = gr->gr_gid;
+ }
+ } else {
+ /* It's a user/group ID (eg "12345"). */
+ uid_t id = strtoul(cp, &end, 10);
+ idmap_rid_t rid;
+ char *mapdomain;
+
+ if (*end != '\0')
+ return (EINVAL);
+ if (id > MAXUID) {
+ /* It's an ephemeral ID. */
+ if (idmap_id_to_numeric_domain_rid(id, isuser,
+ &mapdomain, &rid) != 0)
+ return (ENOENT);
+ (void) strlcpy(domain, mapdomain, domainlen);
+ *ridp = rid;
+ } else {
+ *ridp = id;
+ }
+ }
+
+ ASSERT3P(numericsid, ==, NULL);
+ return (0);
+}
+
+static int
+zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname,
+ uint64_t *propvalue, zfs_userquota_prop_t *typep)
+{
+ int err;
+ zfs_cmd_t zc = { 0 };
+
+ (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+ err = userquota_propname_decode(propname,
+ zfs_prop_get_int(zhp, ZFS_PROP_ZONED),
+ typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid);
+ zc.zc_objset_type = *typep;
+ if (err)
+ return (err);
+
+ err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc);
+ if (err)
+ return (err);
+
+ *propvalue = zc.zc_cookie;
+ return (0);
+}
+
+int
+zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
+ uint64_t *propvalue)
+{
+ zfs_userquota_prop_t type;
+
+ return (zfs_prop_get_userquota_common(zhp, propname, propvalue,
+ &type));
+}
+
+int
+zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
+ char *propbuf, int proplen, boolean_t literal)
+{
+ int err;
+ uint64_t propvalue;
+ zfs_userquota_prop_t type;
+
+ err = zfs_prop_get_userquota_common(zhp, propname, &propvalue,
+ &type);
+
+ if (err)
+ return (err);
+
+ if (literal) {
+ (void) snprintf(propbuf, proplen, "%llu", propvalue);
+ } else if (propvalue == 0 &&
+ (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) {
+ (void) strlcpy(propbuf, "none", proplen);
+ } else {
+ zfs_nicenum(propvalue, propbuf, proplen);
+ }
+ return (0);
+}
+
/*
* Returns the name of the given zfs handle.
*/
@@ -2592,6 +2299,53 @@ zfs_get_type(const zfs_handle_t *zhp)
return (zhp->zfs_type);
}
+static int
+zfs_do_list_ioctl(zfs_handle_t *zhp, unsigned long arg, zfs_cmd_t *zc)
+{
+ int rc;
+ uint64_t orig_cookie;
+
+ orig_cookie = zc->zc_cookie;
+top:
+ (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
+ rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc);
+
+ /*
+ * FreeBSD compatibility with pre-v15 kernel module.
+ * Ignore private dataset names.
+ */
+ if (strchr(zc->zc_name, '$') != NULL)
+ rc = 0;
+
+ if (rc == -1) {
+ switch (errno) {
+ case ENOMEM:
+ /* expand nvlist memory and try again */
+ if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) {
+ zcmd_free_nvlists(zc);
+ return (-1);
+ }
+ zc->zc_cookie = orig_cookie;
+ goto top;
+ /*
+ * An errno value of ESRCH indicates normal completion.
+ * If ENOENT is returned, then the underlying dataset
+ * has been removed since we obtained the handle.
+ */
+ case ESRCH:
+ case ENOENT:
+ rc = 1;
+ break;
+ default:
+ rc = zfs_standard_error(zhp->zfs_hdl, errno,
+ dgettext(TEXT_DOMAIN,
+ "cannot iterate filesystems"));
+ break;
+ }
+ }
+ return (rc);
+}
+
/*
* Iterate over all child filesystems
*/
@@ -2605,37 +2359,35 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
return (0);
- for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+ if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+ return (-1);
+
+ while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT,
+ &zc)) == 0) {
+
/*
+ * FreeBSD compatibility with pre-v15 kernel module.
* Ignore private dataset names.
*/
- if (dataset_name_hidden(zc.zc_name))
+ if (strchr(zc.zc_name, '$') != NULL)
continue;
/*
* Silently ignore errors, as the only plausible explanation is
* that the pool has since been removed.
*/
- if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
- zc.zc_name)) == NULL)
+ if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
+ &zc)) == NULL) {
continue;
+ }
- if ((ret = func(nzhp, data)) != 0)
+ if ((ret = func(nzhp, data)) != 0) {
+ zcmd_free_nvlists(&zc);
return (ret);
+ }
}
-
- /*
- * An errno value of ESRCH indicates normal completion. If ENOENT is
- * returned, then the underlying dataset has been removed since we
- * obtained the handle.
- */
- if (errno != ESRCH && errno != ENOENT)
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
-
- return (0);
+ zcmd_free_nvlists(&zc);
+ return ((ret < 0) ? ret : 0);
}
/*
@@ -2651,29 +2403,30 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
return (0);
- for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
- &zc) == 0;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+ if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+ return (-1);
+ while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+ &zc)) == 0) {
- if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
- zc.zc_name)) == NULL)
+ /*
+ * FreeBSD compatibility with pre-v15 kernel module.
+ * Ignore private dataset names.
+ */
+ if (strchr(zc.zc_name, '$') != NULL)
continue;
- if ((ret = func(nzhp, data)) != 0)
+ if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
+ &zc)) == NULL) {
+ continue;
+ }
+
+ if ((ret = func(nzhp, data)) != 0) {
+ zcmd_free_nvlists(&zc);
return (ret);
+ }
}
-
- /*
- * An errno value of ESRCH indicates normal completion. If ENOENT is
- * returned, then the underlying dataset has been removed since we
- * obtained the handle. Silently ignore this case, and return success.
- */
- if (errno != ESRCH && errno != ENOENT)
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
-
- return (0);
+ zcmd_free_nvlists(&zc);
+ return ((ret < 0) ? ret : 0);
}
/*
@@ -2726,8 +2479,8 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
zfs_handle_t *zhp;
char errbuf[1024];
- (void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
- path);
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot create '%s'"), path);
/* get parent, and check to see if this is just a pool */
if (parent_name(path, parent, sizeof (parent)) != 0) {
@@ -4254,18 +4007,20 @@ zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
int
zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
- void *export, void *sharetab, int sharemax, zfs_share_op_t operation)
+ char *resource, void *export, void *sharetab,
+ int sharemax, zfs_share_op_t operation)
{
zfs_cmd_t zc = { 0 };
int error;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
+ if (resource)
+ (void) strlcpy(zc.zc_string, resource, sizeof (zc.zc_string));
zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab;
zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export;
zc.zc_share.z_sharetype = operation;
zc.zc_share.z_sharemax = sharemax;
-
error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
return (error);
}
@@ -4299,6 +4054,126 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
}
}
+#ifdef sun
+static int
+zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path,
+ zfs_smb_acl_op_t cmd, char *resource1, char *resource2)
+{
+ zfs_cmd_t zc = { 0 };
+ nvlist_t *nvlist = NULL;
+ int error;
+
+ (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+ (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
+ zc.zc_cookie = (uint64_t)cmd;
+
+ if (cmd == ZFS_SMB_ACL_RENAME) {
+ if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) {
+ (void) no_memory(hdl);
+ return (NULL);
+ }
+ }
+
+ switch (cmd) {
+ case ZFS_SMB_ACL_ADD:
+ case ZFS_SMB_ACL_REMOVE:
+ (void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string));
+ break;
+ case ZFS_SMB_ACL_RENAME:
+ if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC,
+ resource1) != 0) {
+ (void) no_memory(hdl);
+ return (-1);
+ }
+ if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET,
+ resource2) != 0) {
+ (void) no_memory(hdl);
+ return (-1);
+ }
+ if (zcmd_write_src_nvlist(hdl, &zc, nvlist) != 0) {
+ nvlist_free(nvlist);
+ return (-1);
+ }
+ break;
+ case ZFS_SMB_ACL_PURGE:
+ break;
+ default:
+ return (-1);
+ }
+ error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc);
+ if (nvlist)
+ nvlist_free(nvlist);
+ return (error);
+}
+
+int
+zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset,
+ char *path, char *resource)
+{
+ return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD,
+ resource, NULL));
+}
+
+int
+zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset,
+ char *path, char *resource)
+{
+ return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE,
+ resource, NULL));
+}
+
+int
+zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path)
+{
+ return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE,
+ NULL, NULL));
+}
+
+int
+zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path,
+ char *oldname, char *newname)
+{
+ return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME,
+ oldname, newname));
+}
+#endif /* sun */
+
+int
+zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
+ zfs_userspace_cb_t func, void *arg)
+{
+ zfs_cmd_t zc = { 0 };
+ int error;
+ zfs_useracct_t buf[100];
+
+ (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+ zc.zc_objset_type = type;
+ zc.zc_nvlist_dst = (uintptr_t)buf;
+
+ /* CONSTCOND */
+ while (1) {
+ zfs_useracct_t *zua = buf;
+
+ zc.zc_nvlist_dst_size = sizeof (buf);
+ error = ioctl(zhp->zfs_hdl->libzfs_fd,
+ ZFS_IOC_USERSPACE_MANY, &zc);
+ if (error || zc.zc_nvlist_dst_size == 0)
+ break;
+
+ while (zc.zc_nvlist_dst_size > 0) {
+ error = func(arg, zua->zu_domain, zua->zu_rid,
+ zua->zu_space);
+ if (error != 0)
+ return (error);
+ zua++;
+ zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
+ }
+ }
+
+ return (error);
+}
+
/*
* Attach/detach the given filesystem to/from the given jail.
*/
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
index e7cbf2386014..bc21c51ae26c 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Iterate over all children of the current object. This includes the normal
* dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
@@ -399,13 +397,6 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
-
- /*
- * Ignore private dataset names.
- */
- if (dataset_name_hidden(zc.zc_name))
- continue;
-
/*
* Get statistics for this dataset, to determine the type of the
* dataset and clone statistics. If this fails, the dataset has
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
index c0e47e905f92..06420332c023 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
@@ -63,6 +63,8 @@ struct libzfs_handle {
int libzfs_printerr;
void *libzfs_sharehdl; /* libshare handle */
uint_t libzfs_shareflags;
+ boolean_t libzfs_mnttab_enable;
+ avl_tree_t libzfs_mnttab_cache;
};
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
@@ -185,7 +187,7 @@ extern int zfs_init_libshare(libzfs_handle_t *, int);
extern void zfs_uninit_libshare(libzfs_handle_t *);
extern int zfs_parse_options(char *, zfs_share_proto_t);
-extern int zfs_unshare_proto(zfs_handle_t *zhp,
+extern int zfs_unshare_proto(zfs_handle_t *,
const char *, zfs_share_proto_t *);
#ifdef __FreeBSD__
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
index ea8523d6e825..56c0968ec2da 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
@@ -74,7 +74,6 @@
#include <unistd.h>
#include <zone.h>
#include <sys/mntent.h>
-#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/stat.h>
@@ -243,18 +242,9 @@ dir_is_empty(const char *dirname)
boolean_t
is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
{
- struct mnttab search = { 0 }, entry;
+ struct mnttab entry;
- /*
- * Search for the entry in /etc/mnttab. We don't bother getting the
- * mountpoint, as we can just search for the special device. This will
- * also let us find mounts when the mountpoint is 'legacy'.
- */
- search.mnt_special = (char *)special;
- search.mnt_fstype = MNTTYPE_ZFS;
-
- rewind(zfs_hdl->libzfs_mnttab);
- if (getmntany(zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+ if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
return (B_FALSE);
if (where != NULL)
@@ -367,12 +357,14 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
} else {
zfs_error_aux(hdl, strerror(errno));
}
-
return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
zhp->zfs_name));
}
+ /* add the mounted entry into our cache */
+ libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint,
+ mntopts);
return (0);
}
@@ -398,26 +390,23 @@ unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
int
zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
{
- struct mnttab search = { 0 }, entry;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ struct mnttab entry;
char *mntpt = NULL;
- /* check to see if need to unmount the filesystem */
- search.mnt_special = zhp->zfs_name;
- search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zhp->zfs_hdl->libzfs_mnttab);
+ /* check to see if we need to unmount the filesystem */
if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
-
+ libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
/*
* mountpoint may have come from a call to
* getmnt/getmntany if it isn't NULL. If it is NULL,
- * we know it comes from getmntany which can then get
- * overwritten later. We strdup it to play it safe.
+ * we know it comes from libzfs_mnttab_find which can
+ * then get freed later. We strdup it to play it safe.
*/
if (mountpoint == NULL)
- mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
+ mntpt = zfs_strdup(hdl, entry.mnt_mountp);
else
- mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+ mntpt = zfs_strdup(hdl, mountpoint);
/*
* Unshare and unmount the filesystem
@@ -425,11 +414,12 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
return (-1);
- if (unmount_one(zhp->zfs_hdl, mntpt, flags) != 0) {
+ if (unmount_one(hdl, mntpt, flags) != 0) {
free(mntpt);
(void) zfs_shareall(zhp);
return (-1);
}
+ libzfs_mnttab_remove(hdl, zhp->zfs_name);
free(mntpt);
}
@@ -899,18 +889,17 @@ int
zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
zfs_share_proto_t *proto)
{
- struct mnttab search = { 0 }, entry;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ struct mnttab entry;
char *mntpt = NULL;
/* check to see if need to unmount the filesystem */
- search.mnt_special = (char *)zfs_get_name(zhp);
- search.mnt_fstype = MNTTYPE_ZFS;
rewind(zhp->zfs_hdl->libzfs_mnttab);
if (mountpoint != NULL)
- mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+ mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+ libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
zfs_share_proto_t *curr_proto;
if (mountpoint == NULL)
@@ -919,8 +908,8 @@ zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
for (curr_proto = proto; *curr_proto != PROTO_END;
curr_proto++) {
- if (is_shared(zhp->zfs_hdl, mntpt, *curr_proto) &&
- unshare_one(zhp->zfs_hdl, zhp->zfs_name,
+ if (is_shared(hdl, mntpt, *curr_proto) &&
+ unshare_one(hdl, zhp->zfs_name,
mntpt, *curr_proto) != 0) {
if (mntpt != NULL)
free(mntpt);
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
index 0369062bbc39..471efe29d872 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -49,6 +49,12 @@
static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
+#if defined(__i386) || defined(__amd64)
+#define BOOTCMD "installgrub(1M)"
+#else
+#define BOOTCMD "installboot(1M)"
+#endif
+
/*
* ====================================================================
* zpool property functions
@@ -211,12 +217,39 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
uint_t vsc;
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- if (prop == ZPOOL_PROP_NAME)
+ switch (prop) {
+ case ZPOOL_PROP_NAME:
(void) strlcpy(buf, zpool_get_name(zhp), len);
- else if (prop == ZPOOL_PROP_HEALTH)
+ break;
+
+ case ZPOOL_PROP_HEALTH:
(void) strlcpy(buf, "FAULTED", len);
- else
+ break;
+
+ case ZPOOL_PROP_GUID:
+ intval = zpool_get_prop_int(zhp, prop, &src);
+ (void) snprintf(buf, len, "%llu", intval);
+ break;
+
+ case ZPOOL_PROP_ALTROOT:
+ case ZPOOL_PROP_CACHEFILE:
+ if (zhp->zpool_props != NULL ||
+ zpool_get_all_props(zhp) == 0) {
+ (void) strlcpy(buf,
+ zpool_get_prop_string(zhp, prop, &src),
+ len);
+ if (srctype != NULL)
+ *srctype = src;
+ return (0);
+ }
+ /* FALLTHROUGH */
+ default:
(void) strlcpy(buf, "-", len);
+ break;
+ }
+
+ if (srctype != NULL)
+ *srctype = src;
return (0);
}
@@ -277,6 +310,17 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
return (0);
}
+static boolean_t
+pool_is_bootable(zpool_handle_t *zhp)
+{
+ char bootfs[ZPOOL_MAXNAMELEN];
+
+ return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
+ sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
+ sizeof (bootfs)) != 0);
+}
+
+
/*
* Check if the bootfs name has the same pool name as it is set to.
* Assuming bootfs is a valid dataset name.
@@ -296,7 +340,6 @@ bootfs_name_valid(const char *pool, char *bootfs)
return (B_FALSE);
}
-#if defined(sun)
/*
* Inspect the configuration to determine if any of the devices contain
* an EFI label.
@@ -304,6 +347,7 @@ bootfs_name_valid(const char *pool, char *bootfs)
static boolean_t
pool_uses_efi(nvlist_t *config)
{
+#ifdef sun
nvlist_t **child;
uint_t c, children;
@@ -315,9 +359,9 @@ pool_uses_efi(nvlist_t *config)
if (pool_uses_efi(child[c]))
return (B_TRUE);
}
+#endif /* sun */
return (B_FALSE);
}
-#endif
/*
* Given an nvlist of zpool properties to be set, validate that they are
@@ -519,9 +563,6 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
zhp->zpool_name);
- if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
- return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
-
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
return (no_memory(zhp->zpool_hdl));
@@ -1012,6 +1053,24 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
return (zfs_error(hdl, EZFS_BADVERSION, msg));
}
+ if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
+ uint64_t s;
+
+ for (s = 0; s < nspares; s++) {
+ char *path;
+
+ if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+ &path) == 0 && pool_uses_efi(spares[s])) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device '%s' contains an EFI label and "
+ "cannot be used on root pools."),
+ zpool_vdev_name(hdl, NULL, spares[s]));
+ return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+ }
+ }
+ }
+
if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
SPA_VERSION_L2CACHE &&
nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
@@ -1164,7 +1223,9 @@ zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
}
if (nvlist_add_string(props,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
+ nvlist_add_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
nvlist_free(props);
return (zfs_error_fmt(hdl, EZFS_NOMEM,
dgettext(TEXT_DOMAIN, "cannot import '%s'"),
@@ -1453,7 +1514,6 @@ vdev_online(nvlist_t *nv)
int
zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
{
- char bootfs[ZPOOL_MAXNAMELEN];
nvlist_t *vdev_root;
nvlist_t **child;
uint_t count;
@@ -1463,8 +1523,7 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
* Make sure this is a root pool, as phys_path doesn't mean
* anything to a non-root pool.
*/
- if (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
- sizeof (bootfs), NULL) != 0)
+ if (!pool_is_bootable(zhp))
return (-1);
verify(nvlist_lookup_nvlist(zhp->zpool_config,
@@ -1738,6 +1797,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
uint_t children;
nvlist_t *config_root;
libzfs_handle_t *hdl = zhp->zpool_hdl;
+ boolean_t rootpool = pool_is_bootable(zhp);
if (replacing)
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -1746,6 +1806,16 @@ zpool_vdev_attach(zpool_handle_t *zhp,
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot attach %s to %s"), new_disk, old_disk);
+ /*
+ * If this is a root pool, make sure that we're not attaching an
+ * EFI labeled device.
+ */
+ if (rootpool && pool_uses_efi(nvroot)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "EFI labeled devices are not supported on root pools."));
+ return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+ }
+
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
&islog)) == 0)
@@ -1812,8 +1882,19 @@ zpool_vdev_attach(zpool_handle_t *zhp,
zcmd_free_nvlists(&zc);
- if (ret == 0)
+ if (ret == 0) {
+ if (rootpool) {
+ /*
+ * XXX - This should be removed once we can
+ * automatically install the bootblocks on the
+ * newly attached disk.
+ */
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
+ "be sure to invoke %s to make '%s' bootable.\n"),
+ BOOTCMD, new_disk);
+ }
return (0);
+ }
switch (errno) {
case ENOTSUP:
@@ -2824,6 +2905,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
if (zhp) {
nvlist_t *nvroot;
+ if (pool_is_bootable(zhp)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "EFI labeled devices are not supported on root "
+ "pools."));
+ return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
+ }
+
verify(nvlist_lookup_nvlist(zhp->zpool_config,
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
index 3516f6d60bdf..cdde90a89800 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
@@ -240,6 +240,8 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
zfs_prop_t prop = zfs_name_to_prop(propname);
nvlist_t *propnv;
+ assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
+
if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
continue;
@@ -596,12 +598,18 @@ dump_filesystem(zfs_handle_t *zhp, void *arg)
zhp->zfs_name, sdd->fromsnap);
sdd->err = B_TRUE;
} else if (!sdd->seento) {
- (void) fprintf(stderr,
- "WARNING: could not send %s@%s:\n"
- "incremental source (%s@%s) "
- "is not earlier than it\n",
- zhp->zfs_name, sdd->tosnap,
- zhp->zfs_name, sdd->fromsnap);
+ if (sdd->fromsnap) {
+ (void) fprintf(stderr,
+ "WARNING: could not send %s@%s:\n"
+ "incremental source (%s@%s) "
+ "is not earlier than it\n",
+ zhp->zfs_name, sdd->tosnap,
+ zhp->zfs_name, sdd->fromsnap);
+ } else {
+ (void) fprintf(stderr, "WARNING: "
+ "could not send %s@%s: does not exist\n",
+ zhp->zfs_name, sdd->tosnap);
+ }
sdd->err = B_TRUE;
}
} else {
@@ -1100,6 +1108,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
char newname[ZFS_MAXNAMELEN];
int error;
boolean_t needagain, progress;
+ char *s1, *s2;
VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
@@ -1294,12 +1303,13 @@ again:
VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
"parentfromsnap", &stream_parent_fromsnap_guid));
+ s1 = strrchr(fsname, '/');
+ s2 = strrchr(stream_fsname, '/');
+
/* check for rename */
- p1 = strrchr(fsname, '/');
- p2 = strrchr(stream_fsname, '/');
if ((stream_parent_fromsnap_guid != 0 &&
stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
- (p1 != NULL && p2 != NULL && strcmp (p1, p2) != 0)) {
+ ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
nvlist_t *parent;
char tryname[ZFS_MAXNAMELEN];
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
index 859630af02d1..8220b3abc0f6 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -364,6 +364,11 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ENOTSUP:
zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
break;
+ case EAGAIN:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool I/O is currently suspended"));
+ zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, strerror(errno));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@@ -437,6 +442,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case EDQUOT:
zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
return (-1);
+ case EAGAIN:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool I/O is currently suspended"));
+ zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, strerror(error));
@@ -480,7 +490,6 @@ zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
if ((ret = realloc(ptr, newsize)) == NULL) {
(void) no_memory(hdl);
- free(ptr);
return (NULL);
}
@@ -595,6 +604,7 @@ libzfs_init(void)
zfs_prop_init();
zpool_prop_init();
+ libzfs_mnttab_init(hdl);
return (hdl);
}
@@ -612,6 +622,7 @@ libzfs_fini(libzfs_handle_t *hdl)
(void) free(hdl->libzfs_log_str);
zpool_free_handles(hdl);
namespace_clear(hdl);
+ libzfs_mnttab_fini(hdl);
free(hdl);
}
@@ -802,6 +813,10 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
"SOURCE"));
+ /* first property is always NAME */
+ assert(cbp->cb_proplist->pl_prop ==
+ ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME));
+
/*
* Go through and calculate the widths for each column. For the
* 'source' column, we kludge it up by taking the worst-case scenario of
@@ -829,9 +844,13 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
}
/*
- * 'VALUE' column
+ * 'VALUE' column. The first property is always the 'name'
+ * property that was tacked on either by /sbin/zfs's
+ * zfs_do_get() or when calling zprop_expand_list(), so we
+ * ignore its width. If the user specified the name property
+ * to display, then it will be later in the list in any case.
*/
- if ((pl->pl_prop != ZFS_PROP_NAME || !pl->pl_all) &&
+ if (pl != cbp->cb_proplist &&
pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
@@ -1016,9 +1035,9 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
return (-1);
}
- /* Rely on stroll() to process the numeric portion. */
+ /* Rely on stroull() to process the numeric portion. */
errno = 0;
- *num = strtoll(value, &end, 10);
+ *num = strtoull(value, &end, 10);
/*
* Check for ERANGE, which indicates that the value is too large to fit
@@ -1208,7 +1227,7 @@ addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
* dataset property,
*/
if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
- !zfs_prop_user(propname))) {
+ (!zfs_prop_user(propname) && !zfs_prop_userquota(propname)))) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid property '%s'"), propname);
return (zfs_error(hdl, EZFS_BADPROP,
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
index 6623be327e63..ff06fea4387b 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@@ -329,6 +329,7 @@ typedef void (task_func_t)(void *);
#define TASKQ_PREPOPULATE 0x0001
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
+#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
@@ -590,6 +591,8 @@ typedef struct ksiddomain {
ksiddomain_t *ksid_lookupdomain(const char *);
void ksiddomain_rele(ksiddomain_t *);
+typedef uint32_t idmap_rid_t;
+
#define SX_SYSINIT(name, lock, desc)
#define SYSCTL_DECL(...)
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
index 93acdcf8e4e3..1a73fe83cc3e 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -174,6 +174,19 @@ taskq_create(const char *name, int nthreads, pri_t pri,
taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
int t;
+ if (flags & TASKQ_THREADS_CPU_PCT) {
+ int pct;
+ ASSERT3S(nthreads, >=, 0);
+ ASSERT3S(nthreads, <=, 100);
+ pct = MIN(nthreads, 100);
+ pct = MAX(pct, 0);
+
+ nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100;
+ nthreads = MAX(nthreads, 1); /* need at least 1 thread */
+ } else {
+ ASSERT3S(nthreads, >=, 1);
+ }
+
rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py
new file mode 100644
index 000000000000..f4b0f539542f
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py
@@ -0,0 +1,28 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+"""
+package which provides an administrative interface to ZFS
+"""
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py
new file mode 100644
index 000000000000..d3a03c731868
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py
@@ -0,0 +1,394 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module implements the "zfs allow" and "zfs unallow" subcommands.
+The only public interface is the zfs.allow.do_allow() function."""
+
+import zfs.util
+import zfs.dataset
+import optparse
+import sys
+import pwd
+import grp
+import errno
+
+_ = zfs.util._
+
+class FSPerms(object):
+ """This class represents all the permissions that are set on a
+ particular filesystem (not including those inherited)."""
+
+ __slots__ = "create", "sets", "local", "descend", "ld"
+ __repr__ = zfs.util.default_repr
+
+ def __init__(self, raw):
+ """Create a FSPerms based on the dict of raw permissions
+ from zfs.ioctl.get_fsacl()."""
+ # set of perms
+ self.create = set()
+
+ # below are { "Ntype name": set(perms) }
+ # where N is a number that we just use for sorting,
+ # type is "user", "group", "everyone", or "" (for sets)
+ # name is a user, group, or set name, or "" (for everyone)
+ self.sets = dict()
+ self.local = dict()
+ self.descend = dict()
+ self.ld = dict()
+
+ # see the comment in dsl_deleg.c for the definition of whokey
+ for whokey in raw.keys():
+ perms = raw[whokey].keys()
+ whotypechr = whokey[0].lower()
+ ws = whokey[3:]
+ if whotypechr == "c":
+ self.create.update(perms)
+ elif whotypechr == "s":
+ nwho = "1" + ws
+ self.sets.setdefault(nwho, set()).update(perms)
+ else:
+ if whotypechr == "u":
+ try:
+ name = pwd.getpwuid(int(ws)).pw_name
+ except KeyError:
+ name = ws
+ nwho = "1user " + name
+ elif whotypechr == "g":
+ try:
+ name = grp.getgrgid(int(ws)).gr_name
+ except KeyError:
+ name = ws
+ nwho = "2group " + name
+ elif whotypechr == "e":
+ nwho = "3everyone"
+ else:
+ raise ValueError(whotypechr)
+
+ if whokey[1] == "l":
+ d = self.local
+ elif whokey[1] == "d":
+ d = self.descend
+ else:
+ raise ValueError(whokey[1])
+
+ d.setdefault(nwho, set()).update(perms)
+
+ # Find perms that are in both local and descend, and
+ # move them to ld.
+ for nwho in self.local:
+ if nwho not in self.descend:
+ continue
+ # note: these are set operations
+ self.ld[nwho] = self.local[nwho] & self.descend[nwho]
+ self.local[nwho] -= self.ld[nwho]
+ self.descend[nwho] -= self.ld[nwho]
+
+ @staticmethod
+ def __ldstr(d, header):
+ s = ""
+ for (nwho, perms) in sorted(d.items()):
+ # local and descend may have entries where perms
+ # is an empty set, due to consolidating all
+ # permissions into ld
+ if perms:
+ s += "\t%s %s\n" % \
+ (nwho[1:], ",".join(sorted(perms)))
+ if s:
+ s = header + s
+ return s
+
+ def __str__(self):
+ s = self.__ldstr(self.sets, _("Permission sets:\n"))
+
+ if self.create:
+ s += _("Create time permissions:\n")
+ s += "\t%s\n" % ",".join(sorted(self.create))
+
+ s += self.__ldstr(self.local, _("Local permissions:\n"))
+ s += self.__ldstr(self.descend, _("Descendent permissions:\n"))
+ s += self.__ldstr(self.ld, _("Local+Descendent permissions:\n"))
+ return s.rstrip()
+
+def args_to_perms(parser, options, who, perms):
+ """Return a dict of raw perms {"whostr" -> {"perm" -> None}}
+ based on the command-line input."""
+
+ # perms is not set if we are doing a "zfs unallow <who> <fs>" to
+ # remove all of someone's permissions
+ if perms:
+ setperms = dict(((p, None) for p in perms if p[0] == "@"))
+ baseperms = dict(((canonicalized_perm(p), None)
+ for p in perms if p[0] != "@"))
+ else:
+ setperms = None
+ baseperms = None
+
+ d = dict()
+
+ def storeperm(typechr, inheritchr, arg):
+ assert typechr in "ugecs"
+ assert inheritchr in "ld-"
+
+ def mkwhokey(t):
+ return "%c%c$%s" % (t, inheritchr, arg)
+
+ if baseperms or not perms:
+ d[mkwhokey(typechr)] = baseperms
+ if setperms or not perms:
+ d[mkwhokey(typechr.upper())] = setperms
+
+ def decodeid(w, toidfunc, fmt):
+ try:
+ return int(w)
+ except ValueError:
+ try:
+ return toidfunc(w)[2]
+ except KeyError:
+ parser.error(fmt % w)
+
+ if options.set:
+ storeperm("s", "-", who)
+ elif options.create:
+ storeperm("c", "-", "")
+ else:
+ for w in who:
+ if options.user:
+ id = decodeid(w, pwd.getpwnam,
+ _("invalid user %s"))
+ typechr = "u"
+ elif options.group:
+ id = decodeid(w, grp.getgrnam,
+ _("invalid group %s"))
+ typechr = "g"
+ elif w == "everyone":
+ id = ""
+ typechr = "e"
+ else:
+ try:
+ id = pwd.getpwnam(w)[2]
+ typechr = "u"
+ except KeyError:
+ try:
+ id = grp.getgrnam(w)[2]
+ typechr = "g"
+ except KeyError:
+ parser.error(_("invalid user/group %s") % w)
+ if options.local:
+ storeperm(typechr, "l", id)
+ if options.descend:
+ storeperm(typechr, "d", id)
+ return d
+
+perms_subcmd = dict(
+ create=_("Must also have the 'mount' ability"),
+ destroy=_("Must also have the 'mount' ability"),
+ snapshot=_("Must also have the 'mount' ability"),
+ rollback=_("Must also have the 'mount' ability"),
+ clone=_("""Must also have the 'create' ability and 'mount'
+\t\t\t\tability in the origin file system"""),
+ promote=_("""Must also have the 'mount'
+\t\t\t\tand 'promote' ability in the origin file system"""),
+ rename=_("""Must also have the 'mount' and 'create'
+\t\t\t\tability in the new parent"""),
+ receive=_("Must also have the 'mount' and 'create' ability"),
+ allow=_("Must also have the permission that is being\n\t\t\t\tallowed"),
+ mount=_("Allows mount/umount of ZFS datasets"),
+ share=_("Allows sharing file systems over NFS or SMB\n\t\t\t\tprotocols"),
+ send="",
+)
+
+perms_other = dict(
+ userprop=_("Allows changing any user property"),
+ userquota=_("Allows accessing any userquota@... property"),
+ groupquota=_("Allows accessing any groupquota@... property"),
+ userused=_("Allows reading any userused@... property"),
+ groupused=_("Allows reading any groupused@... property"),
+)
+
+def hasset(ds, setname):
+ """Return True if the given setname (string) is defined for this
+ ds (Dataset)."""
+ # It would be nice to cache the result of get_fsacl().
+ for raw in ds.get_fsacl().values():
+ for whokey in raw.keys():
+ if whokey[0].lower() == "s" and whokey[3:] == setname:
+ return True
+ return False
+
+def canonicalized_perm(permname):
+ """Return the canonical name (string) for this permission (string).
+ Raises ZFSError if it is not a valid permission."""
+ if permname in perms_subcmd.keys() or permname in perms_other.keys():
+ return permname
+ try:
+ return zfs.dataset.getpropobj(permname).name
+ except KeyError:
+ raise zfs.util.ZFSError(errno.EINVAL, permname,
+ _("invalid permission"))
+
+def print_perms():
+ """Print the set of supported permissions."""
+ print(_("\nThe following permissions are supported:\n"))
+ fmt = "%-16s %-14s\t%s"
+ print(fmt % (_("NAME"), _("TYPE"), _("NOTES")))
+
+ for (name, note) in sorted(perms_subcmd.iteritems()):
+ print(fmt % (name, _("subcommand"), note))
+
+ for (name, note) in sorted(perms_other.iteritems()):
+ print(fmt % (name, _("other"), note))
+
+ for (name, prop) in sorted(zfs.dataset.proptable.iteritems()):
+ if prop.visible and prop.delegatable():
+ print(fmt % (name, _("property"), ""))
+
+def do_allow():
+ """Implementes the "zfs allow" and "zfs unallow" subcommands."""
+ un = (sys.argv[1] == "unallow")
+
+ def usage(msg=None):
+ parser.print_help()
+ print_perms()
+ if msg:
+ print
+ parser.exit("zfs: error: " + msg)
+ else:
+ parser.exit()
+
+ if un:
+ u = _("""unallow [-rldug] <"everyone"|user|group>[,...]
+ [<perm|@setname>[,...]] <filesystem|volume>
+ unallow [-rld] -e [<perm|@setname>[,...]] <filesystem|volume>
+ unallow [-r] -c [<perm|@setname>[,...]] <filesystem|volume>
+ unallow [-r] -s @setname [<perm|@setname>[,...]] <filesystem|volume>""")
+ verb = _("remove")
+ sstr = _("undefine permission set")
+ else:
+ u = _("""allow <filesystem|volume>
+ allow [-ldug] <"everyone"|user|group>[,...] <perm|@setname>[,...]
+ <filesystem|volume>
+ allow [-ld] -e <perm|@setname>[,...] <filesystem|volume>
+ allow -c <perm|@setname>[,...] <filesystem|volume>
+ allow -s @setname <perm|@setname>[,...] <filesystem|volume>""")
+ verb = _("set")
+ sstr = _("define permission set")
+
+ parser = optparse.OptionParser(usage=u, prog="zfs")
+
+ parser.add_option("-l", action="store_true", dest="local",
+ help=_("%s permission locally") % verb)
+ parser.add_option("-d", action="store_true", dest="descend",
+ help=_("%s permission for descendents") % verb)
+ parser.add_option("-u", action="store_true", dest="user",
+ help=_("%s permission for user") % verb)
+ parser.add_option("-g", action="store_true", dest="group",
+ help=_("%s permission for group") % verb)
+ parser.add_option("-e", action="store_true", dest="everyone",
+ help=_("%s permission for everyone") % verb)
+ parser.add_option("-c", action="store_true", dest="create",
+ help=_("%s create time permissions") % verb)
+ parser.add_option("-s", action="store_true", dest="set", help=sstr)
+ if un:
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help=_("remove permissions recursively"))
+
+ if len(sys.argv) == 3 and not un:
+ # just print the permissions on this fs
+
+ if sys.argv[2] == "-h":
+ # hack to make "zfs allow -h" work
+ usage()
+ ds = zfs.dataset.Dataset(sys.argv[2])
+
+ p = dict()
+ for (fs, raw) in ds.get_fsacl().items():
+ p[fs] = FSPerms(raw)
+
+ for fs in sorted(p.keys(), reverse=True):
+ s = _("---- Permissions on %s ") % fs
+ print(s + "-" * (70-len(s)))
+ print(p[fs])
+ return
+
+
+ (options, args) = parser.parse_args(sys.argv[2:])
+
+ if sum((bool(options.everyone), bool(options.user),
+ bool(options.group))) > 1:
+ parser.error(_("-u, -g, and -e are mutually exclusive"))
+
+ def mungeargs(expected_len):
+ if un and len(args) == expected_len-1:
+ return (None, args[expected_len-2])
+ elif len(args) == expected_len:
+ return (args[expected_len-2].split(","),
+ args[expected_len-1])
+ else:
+ usage(_("wrong number of parameters"))
+
+ if options.set:
+ if options.local or options.descend or options.user or \
+ options.group or options.everyone or options.create:
+ parser.error(_("invalid option combined with -s"))
+ if args[0][0] != "@":
+ parser.error(_("invalid set name: missing '@' prefix"))
+
+ (perms, fsname) = mungeargs(3)
+ who = args[0]
+ elif options.create:
+ if options.local or options.descend or options.user or \
+ options.group or options.everyone or options.set:
+ parser.error(_("invalid option combined with -c"))
+
+ (perms, fsname) = mungeargs(2)
+ who = None
+ elif options.everyone:
+ if options.user or options.group or \
+ options.create or options.set:
+ parser.error(_("invalid option combined with -e"))
+
+ (perms, fsname) = mungeargs(2)
+ who = ["everyone"]
+ else:
+ (perms, fsname) = mungeargs(3)
+ who = args[0].split(",")
+
+ if not options.local and not options.descend:
+ options.local = True
+ options.descend = True
+
+ d = args_to_perms(parser, options, who, perms)
+
+ ds = zfs.dataset.Dataset(fsname, snaps=False)
+
+ if not un and perms:
+ for p in perms:
+ if p[0] == "@" and not hasset(ds, p):
+ parser.error(_("set %s is not defined") % p)
+
+ ds.set_fsacl(un, d)
+ if un and options.recursive:
+ for child in ds.descendents():
+ child.set_fsacl(un, d)
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py
new file mode 100644
index 000000000000..b45173e01f2e
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py
@@ -0,0 +1,205 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+"""Implements the Dataset class, providing methods for manipulating ZFS
+datasets. Also implements the Property class, which describes ZFS
+properties."""
+
+import zfs.ioctl
+import zfs.util
+import errno
+
+_ = zfs.util._
+
+class Property(object):
+ """This class represents a ZFS property. It contains
+ information about the property -- if it's readonly, a number vs
+ string vs index, etc. Only native properties are represented by
+ this class -- not user properties (eg "user:prop") or userspace
+ properties (eg "userquota@joe")."""
+
+ __slots__ = "name", "number", "type", "default", "attr", "validtypes", \
+ "values", "colname", "rightalign", "visible", "indextable"
+ __repr__ = zfs.util.default_repr
+
+ def __init__(self, t):
+ """t is the tuple of information about this property
+ from zfs.ioctl.get_proptable, which should match the
+ members of zprop_desc_t (see zfs_prop.h)."""
+
+ self.name = t[0]
+ self.number = t[1]
+ self.type = t[2]
+ if self.type == "string":
+ self.default = t[3]
+ else:
+ self.default = t[4]
+ self.attr = t[5]
+ self.validtypes = t[6]
+ self.values = t[7]
+ self.colname = t[8]
+ self.rightalign = t[9]
+ self.visible = t[10]
+ self.indextable = t[11]
+
+ def delegatable(self):
+ """Return True if this property can be delegated with
+ "zfs allow"."""
+ return self.attr != "readonly"
+
+proptable = dict()
+for name, t in zfs.ioctl.get_proptable().iteritems():
+ proptable[name] = Property(t)
+del name, t
+
+def getpropobj(name):
+ """Return the Property object that is identified by the given
+ name string. It can be the full name, or the column name."""
+ try:
+ return proptable[name]
+ except KeyError:
+ for p in proptable.itervalues():
+ if p.colname and p.colname.lower() == name:
+ return p
+ raise
+
+class Dataset(object):
+ """Represents a ZFS dataset (filesystem, snapshot, zvol, clone, etc).
+
+ Generally, this class provides interfaces to the C functions in
+ zfs.ioctl which actually interface with the kernel to manipulate
+ datasets.
+
+ Unless otherwise noted, any method can raise a ZFSError to
+ indicate failure."""
+
+ __slots__ = "name", "__props"
+ __repr__ = zfs.util.default_repr
+
+ def __init__(self, name, props=None,
+ types=("filesystem", "volume"), snaps=True):
+ """Open the named dataset, checking that it exists and
+ is of the specified type.
+
+ name is the string name of this dataset.
+
+ props is the property settings dict from zfs.ioctl.next_dataset.
+
+ types is an iterable of strings specifying which types
+ of datasets are permitted. Accepted strings are
+ "filesystem" and "volume". Defaults to acceptying all
+ types.
+
+ snaps is a boolean specifying if snapshots are acceptable.
+
+ Raises a ZFSError if the dataset can't be accessed (eg
+ doesn't exist) or is not of the specified type.
+ """
+
+ self.name = name
+
+ e = zfs.util.ZFSError(errno.EINVAL,
+ _("cannot open %s") % name,
+ _("operation not applicable to datasets of this type"))
+ if "@" in name and not snaps:
+ raise e
+ if not props:
+ props = zfs.ioctl.dataset_props(name)
+ self.__props = props
+ if "volume" not in types and self.getprop("type") == 3:
+ raise e
+ if "filesystem" not in types and self.getprop("type") == 2:
+ raise e
+
+ def getprop(self, propname):
+ """Return the value of the given property for this dataset.
+
+ Currently only works for native properties (those with a
+ Property object.)
+
+ Raises KeyError if propname does not specify a native property.
+ Does not raise ZFSError.
+ """
+
+ p = getpropobj(propname)
+ try:
+ return self.__props[p.name]["value"]
+ except KeyError:
+ return p.default
+
+ def parent(self):
+ """Return a Dataset representing the parent of this one."""
+ return Dataset(self.name[:self.name.rindex("/")])
+
+ def descendents(self):
+ """A generator function which iterates over all
+ descendent Datasets (not including snapshots."""
+
+ cookie = 0
+ while True:
+ # next_dataset raises StopIteration when done
+ (name, cookie, props) = \
+ zfs.ioctl.next_dataset(self.name, False, cookie)
+ ds = Dataset(name, props)
+ yield ds
+ for child in ds.descendents():
+ yield child
+
+ def userspace(self, prop):
+ """A generator function which iterates over a
+ userspace-type property.
+
+ prop specifies which property ("userused@",
+ "userquota@", "groupused@", or "groupquota@").
+
+ returns 3-tuple of domain (string), rid (int), and space (int).
+ """
+
+ d = zfs.ioctl.userspace_many(self.name, prop)
+ for ((domain, rid), space) in d.iteritems():
+ yield (domain, rid, space)
+
+ def userspace_upgrade(self):
+ """Initialize the accounting information for
+ userused@... and groupused@... properties."""
+ return zfs.ioctl.userspace_upgrade(self.name)
+
+ def set_fsacl(self, un, d):
+ """Add to the "zfs allow"-ed permissions on this Dataset.
+
+ un is True if the specified permissions should be removed.
+
+ d is a dict specifying which permissions to add/remove:
+ { "whostr" -> None # remove all perms for this entity
+ "whostr" -> { "perm" -> None} # add/remove these perms
+ } """
+ return zfs.ioctl.set_fsacl(self.name, un, d)
+
+ def get_fsacl(self):
+ """Get the "zfs allow"-ed permissions on the Dataset.
+
+ Return a dict("whostr": { "perm" -> None })."""
+
+ return zfs.ioctl.get_fsacl(self.name)
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py
new file mode 100644
index 000000000000..7db4bf3e0c20
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py
@@ -0,0 +1,29 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+import zfs.userspace
+
+do_groupspace = zfs.userspace.do_userspace
+
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c
new file mode 100644
index 000000000000..c0de5c474c0e
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c
@@ -0,0 +1,610 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <Python.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libnvpair.h>
+#include <idmap.h>
+#include <zone.h>
+#include <libintl.h>
+#include <libzfs.h>
+#include "zfs_prop.h"
+
+static PyObject *ZFSError;
+static int zfsdevfd;
+
+#ifdef __lint
+#define dgettext(x, y) y
+#endif
+
+#define _(s) dgettext(TEXT_DOMAIN, s)
+
+#ifdef sun
+extern int sid_to_id(char *sid, boolean_t user, uid_t *id);
+#endif /* sun */
+
+/*PRINTFLIKE1*/
+static void
+seterr(char *fmt, ...)
+{
+ char errstr[1024];
+ va_list v;
+
+ va_start(v, fmt);
+ (void) vsnprintf(errstr, sizeof (errstr), fmt, v);
+ va_end(v);
+
+ PyErr_SetObject(ZFSError, Py_BuildValue("is", errno, errstr));
+}
+
+static char cmdstr[HIS_MAX_RECORD_LEN];
+
+static int
+ioctl_with_cmdstr(unsigned long ioc, zfs_cmd_t *zc)
+{
+ int err;
+
+ if (cmdstr[0])
+ zc->zc_history = (uint64_t)(uintptr_t)cmdstr;
+ err = ioctl(zfsdevfd, ioc, zc);
+ cmdstr[0] = '\0';
+ return (err);
+}
+
+static PyObject *
+nvl2py(nvlist_t *nvl)
+{
+ PyObject *pyo;
+ nvpair_t *nvp;
+
+ pyo = PyDict_New();
+
+ for (nvp = nvlist_next_nvpair(nvl, NULL); nvp;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ PyObject *pyval;
+ char *sval;
+ uint64_t ival;
+ boolean_t bval;
+ nvlist_t *nval;
+
+ switch (nvpair_type(nvp)) {
+ case DATA_TYPE_STRING:
+ (void) nvpair_value_string(nvp, &sval);
+ pyval = Py_BuildValue("s", sval);
+ break;
+
+ case DATA_TYPE_UINT64:
+ (void) nvpair_value_uint64(nvp, &ival);
+ pyval = Py_BuildValue("K", ival);
+ break;
+
+ case DATA_TYPE_NVLIST:
+ (void) nvpair_value_nvlist(nvp, &nval);
+ pyval = nvl2py(nval);
+ break;
+
+ case DATA_TYPE_BOOLEAN:
+ Py_INCREF(Py_None);
+ pyval = Py_None;
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ (void) nvpair_value_boolean_value(nvp, &bval);
+ pyval = Py_BuildValue("i", bval);
+ break;
+
+ default:
+ PyErr_SetNone(PyExc_ValueError);
+ Py_DECREF(pyo);
+ return (NULL);
+ }
+
+ PyDict_SetItemString(pyo, nvpair_name(nvp), pyval);
+ Py_DECREF(pyval);
+ }
+
+ return (pyo);
+}
+
+static nvlist_t *
+dict2nvl(PyObject *d)
+{
+ nvlist_t *nvl;
+ int err;
+ PyObject *key, *value;
+// int pos = 0;
+ Py_ssize_t pos = 0;
+
+ if (!PyDict_Check(d)) {
+ PyErr_SetObject(PyExc_ValueError, d);
+ return (NULL);
+ }
+
+ err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
+ assert(err == 0);
+
+ while (PyDict_Next(d, &pos, &key, &value)) {
+ char *keystr = PyString_AsString(key);
+ if (keystr == NULL) {
+ PyErr_SetObject(PyExc_KeyError, key);
+ nvlist_free(nvl);
+ return (NULL);
+ }
+
+ if (PyDict_Check(value)) {
+ nvlist_t *valnvl = dict2nvl(value);
+ err = nvlist_add_nvlist(nvl, keystr, valnvl);
+ nvlist_free(valnvl);
+ } else if (value == Py_None) {
+ err = nvlist_add_boolean(nvl, keystr);
+ } else if (PyString_Check(value)) {
+ char *valstr = PyString_AsString(value);
+ err = nvlist_add_string(nvl, keystr, valstr);
+ } else if (PyInt_Check(value)) {
+ uint64_t valint = PyInt_AsUnsignedLongLongMask(value);
+ err = nvlist_add_uint64(nvl, keystr, valint);
+ } else if (PyBool_Check(value)) {
+ boolean_t valbool = value == Py_True ? B_TRUE : B_FALSE;
+ err = nvlist_add_boolean_value(nvl, keystr, valbool);
+ } else {
+ PyErr_SetObject(PyExc_ValueError, value);
+ nvlist_free(nvl);
+ return (NULL);
+ }
+ assert(err == 0);
+ }
+
+ return (nvl);
+}
+
+static PyObject *
+fakepropval(uint64_t value)
+{
+ PyObject *d = PyDict_New();
+ PyDict_SetItemString(d, "value", Py_BuildValue("K", value));
+ return (d);
+}
+
+static void
+add_ds_props(zfs_cmd_t *zc, PyObject *nvl)
+{
+ dmu_objset_stats_t *s = &zc->zc_objset_stats;
+ PyDict_SetItemString(nvl, "numclones",
+ fakepropval(s->dds_num_clones));
+ PyDict_SetItemString(nvl, "issnap",
+ fakepropval(s->dds_is_snapshot));
+ PyDict_SetItemString(nvl, "inconsistent",
+ fakepropval(s->dds_inconsistent));
+}
+
+/* On error, returns NULL but does not set python exception. */
+static PyObject *
+ioctl_with_dstnv(unsigned long ioc, zfs_cmd_t *zc)
+{
+ int nvsz = 2048;
+ void *nvbuf;
+ PyObject *pynv = NULL;
+
+again:
+ nvbuf = malloc(nvsz);
+ zc->zc_nvlist_dst_size = nvsz;
+ zc->zc_nvlist_dst = (uintptr_t)nvbuf;
+
+ if (ioctl(zfsdevfd, ioc, zc) == 0) {
+ nvlist_t *nvl;
+
+ errno = nvlist_unpack(nvbuf, zc->zc_nvlist_dst_size, &nvl, 0);
+ if (errno == 0) {
+ pynv = nvl2py(nvl);
+ nvlist_free(nvl);
+ }
+ } else if (errno == ENOMEM) {
+ free(nvbuf);
+ nvsz = zc->zc_nvlist_dst_size;
+ goto again;
+ }
+ free(nvbuf);
+ return (pynv);
+}
+
+static PyObject *
+py_next_dataset(PyObject *self, PyObject *args)
+{
+ unsigned long ioc;
+ uint64_t cookie;
+ zfs_cmd_t zc = { 0 };
+ int snaps;
+ char *name;
+ PyObject *nvl;
+ PyObject *ret = NULL;
+
+ if (!PyArg_ParseTuple(args, "siK", &name, &snaps, &cookie))
+ return (NULL);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+ zc.zc_cookie = cookie;
+
+ if (snaps)
+ ioc = ZFS_IOC_SNAPSHOT_LIST_NEXT;
+ else
+ ioc = ZFS_IOC_DATASET_LIST_NEXT;
+
+ nvl = ioctl_with_dstnv(ioc, &zc);
+ if (nvl) {
+ add_ds_props(&zc, nvl);
+ ret = Py_BuildValue("sKO", zc.zc_name, zc.zc_cookie, nvl);
+ Py_DECREF(nvl);
+ } else if (errno == ESRCH) {
+ PyErr_SetNone(PyExc_StopIteration);
+ } else {
+ if (snaps)
+ seterr(_("cannot get snapshots of %s"), name);
+ else
+ seterr(_("cannot get child datasets of %s"), name);
+ }
+ return (ret);
+}
+
+static PyObject *
+py_dataset_props(PyObject *self, PyObject *args)
+{
+ zfs_cmd_t zc = { 0 };
+ int snaps;
+ char *name;
+ PyObject *nvl;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return (NULL);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+ nvl = ioctl_with_dstnv(ZFS_IOC_OBJSET_STATS, &zc);
+ if (nvl) {
+ add_ds_props(&zc, nvl);
+ } else {
+ seterr(_("cannot access dataset %s"), name);
+ }
+ return (nvl);
+}
+
+static PyObject *
+py_get_fsacl(PyObject *self, PyObject *args)
+{
+ zfs_cmd_t zc = { 0 };
+ char *name;
+ PyObject *nvl;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return (NULL);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+ nvl = ioctl_with_dstnv(ZFS_IOC_GET_FSACL, &zc);
+ if (nvl == NULL)
+ seterr(_("cannot get permissions on %s"), name);
+
+ return (nvl);
+}
+
+static PyObject *
+py_set_fsacl(PyObject *self, PyObject *args)
+{
+ int un;
+ size_t nvsz;
+ zfs_cmd_t zc = { 0 };
+ char *name, *nvbuf;
+ PyObject *dict, *file;
+ nvlist_t *nvl;
+ int err;
+
+ if (!PyArg_ParseTuple(args, "siO!", &name, &un,
+ &PyDict_Type, &dict))
+ return (NULL);
+
+ nvl = dict2nvl(dict);
+ if (nvl == NULL)
+ return (NULL);
+
+ err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
+ assert(err == 0);
+ nvbuf = malloc(nvsz);
+ err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
+ assert(err == 0);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+ zc.zc_nvlist_src_size = nvsz;
+ zc.zc_nvlist_src = (uintptr_t)nvbuf;
+ zc.zc_perm_action = un;
+
+ err = ioctl_with_cmdstr(ZFS_IOC_SET_FSACL, &zc);
+ free(nvbuf);
+ if (err) {
+ seterr(_("cannot set permissions on %s"), name);
+ return (NULL);
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+py_userspace_many(PyObject *self, PyObject *args)
+{
+ zfs_cmd_t zc = { 0 };
+ zfs_userquota_prop_t type;
+ char *name, *propname;
+ int bufsz = 1<<20;
+ void *buf;
+ PyObject *dict, *file;
+ int error;
+
+ if (!PyArg_ParseTuple(args, "ss", &name, &propname))
+ return (NULL);
+
+ for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++)
+ if (strcmp(propname, zfs_userquota_prop_prefixes[type]) == 0)
+ break;
+ if (type == ZFS_NUM_USERQUOTA_PROPS) {
+ PyErr_SetString(PyExc_KeyError, propname);
+ return (NULL);
+ }
+
+ dict = PyDict_New();
+ buf = malloc(bufsz);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+ zc.zc_objset_type = type;
+ zc.zc_cookie = 0;
+
+ while (1) {
+ zfs_useracct_t *zua = buf;
+
+ zc.zc_nvlist_dst = (uintptr_t)buf;
+ zc.zc_nvlist_dst_size = bufsz;
+
+ error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_MANY, &zc);
+ if (error || zc.zc_nvlist_dst_size == 0)
+ break;
+
+ while (zc.zc_nvlist_dst_size > 0) {
+ PyObject *pykey, *pyval;
+
+ pykey = Py_BuildValue("sI",
+ zua->zu_domain, zua->zu_rid);
+ pyval = Py_BuildValue("K", zua->zu_space);
+ PyDict_SetItem(dict, pykey, pyval);
+ Py_DECREF(pykey);
+ Py_DECREF(pyval);
+
+ zua++;
+ zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
+ }
+ }
+
+ free(buf);
+
+ if (error != 0) {
+ Py_DECREF(dict);
+ seterr(_("cannot get %s property on %s"), propname, name);
+ return (NULL);
+ }
+
+ return (dict);
+}
+
+static PyObject *
+py_userspace_upgrade(PyObject *self, PyObject *args)
+{
+ zfs_cmd_t zc = { 0 };
+ char *name;
+ int error;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return (NULL);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+ error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_UPGRADE, &zc);
+
+ if (error != 0) {
+ seterr(_("cannot initialize user accounting information on %s"),
+ name);
+ return (NULL);
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+py_sid_to_id(PyObject *self, PyObject *args)
+{
+#ifdef sun
+ char *sid;
+ int err, isuser;
+ uid_t id;
+
+ if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
+ return (NULL);
+
+ err = sid_to_id(sid, isuser, &id);
+ if (err) {
+ PyErr_SetString(PyExc_KeyError, sid);
+ return (NULL);
+ }
+
+ return (Py_BuildValue("I", id));
+#else /* sun */
+ return (NULL);
+#endif /* sun */
+}
+
+/*
+ * Translate the sid string ("S-1-...") to the user@domain name, if
+ * possible. There should be a better way to do this, but for now we
+ * just translate to the (possibly ephemeral) uid and then back again.
+ */
+static PyObject *
+py_sid_to_name(PyObject *self, PyObject *args)
+{
+#ifdef sun
+ char *sid;
+ int err, isuser;
+ uid_t id;
+ char *name, *domain;
+ char buf[256];
+
+ if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
+ return (NULL);
+
+ err = sid_to_id(sid, isuser, &id);
+ if (err) {
+ PyErr_SetString(PyExc_KeyError, sid);
+ return (NULL);
+ }
+
+ if (isuser) {
+ err = idmap_getwinnamebyuid(id,
+ IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
+ } else {
+ err = idmap_getwinnamebygid(id,
+ IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
+ }
+ if (err != IDMAP_SUCCESS) {
+ PyErr_SetString(PyExc_KeyError, sid);
+ return (NULL);
+ }
+ (void) snprintf(buf, sizeof (buf), "%s@%s", name, domain);
+ free(name);
+ free(domain);
+
+ return (Py_BuildValue("s", buf));
+#else /* sun */
+ return(NULL);
+#endif /* sun */
+}
+
+static PyObject *
+py_isglobalzone(PyObject *self, PyObject *args)
+{
+ return (Py_BuildValue("i", getzoneid() == GLOBAL_ZONEID));
+}
+
+static PyObject *
+py_set_cmdstr(PyObject *self, PyObject *args)
+{
+ char *str;
+
+ if (!PyArg_ParseTuple(args, "s", &str))
+ return (NULL);
+
+ (void) strlcpy(cmdstr, str, sizeof (cmdstr));
+
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+py_get_proptable(PyObject *self, PyObject *args)
+{
+ zprop_desc_t *t = zfs_prop_get_table();
+ PyObject *d = PyDict_New();
+ zfs_prop_t i;
+
+ for (i = 0; i < ZFS_NUM_PROPS; i++) {
+ zprop_desc_t *p = &t[i];
+ PyObject *tuple;
+ static const char *typetable[] =
+ {"number", "string", "index"};
+ static const char *attrtable[] =
+ {"default", "readonly", "inherit", "onetime"};
+ PyObject *indextable;
+
+ if (p->pd_proptype == PROP_TYPE_INDEX) {
+ const zprop_index_t *it = p->pd_table;
+ indextable = PyDict_New();
+ int j;
+ for (j = 0; it[j].pi_name; j++) {
+ PyDict_SetItemString(indextable,
+ it[j].pi_name,
+ Py_BuildValue("K", it[j].pi_value));
+ }
+ } else {
+ Py_INCREF(Py_None);
+ indextable = Py_None;
+ }
+
+ tuple = Py_BuildValue("sissKsissiiO",
+ p->pd_name, p->pd_propnum, typetable[p->pd_proptype],
+ p->pd_strdefault, p->pd_numdefault,
+ attrtable[p->pd_attr], p->pd_types,
+ p->pd_values, p->pd_colname,
+ p->pd_rightalign, p->pd_visible, indextable);
+ PyDict_SetItemString(d, p->pd_name, tuple);
+ Py_DECREF(tuple);
+ }
+
+ return (d);
+}
+
+static PyMethodDef zfsmethods[] = {
+ {"next_dataset", py_next_dataset, METH_VARARGS,
+ "Get next child dataset or snapshot."},
+ {"get_fsacl", py_get_fsacl, METH_VARARGS, "Get allowed permissions."},
+ {"set_fsacl", py_set_fsacl, METH_VARARGS, "Set allowed permissions."},
+ {"userspace_many", py_userspace_many, METH_VARARGS,
+ "Get user space accounting."},
+ {"userspace_upgrade", py_userspace_upgrade, METH_VARARGS,
+ "Upgrade fs to enable user space accounting."},
+ {"set_cmdstr", py_set_cmdstr, METH_VARARGS,
+ "Set command string for history logging."},
+ {"dataset_props", py_dataset_props, METH_VARARGS,
+ "Get dataset properties."},
+ {"get_proptable", py_get_proptable, METH_NOARGS,
+ "Get property table."},
+ /* Below are not really zfs-specific: */
+ {"sid_to_id", py_sid_to_id, METH_VARARGS, "Map SID to UID/GID."},
+ {"sid_to_name", py_sid_to_name, METH_VARARGS,
+ "Map SID to name@domain."},
+ {"isglobalzone", py_isglobalzone, METH_NOARGS,
+ "Determine if this is the global zone."},
+ {NULL, NULL, 0, NULL}
+};
+
+void
+initioctl(void)
+{
+ PyObject *zfs_ioctl = Py_InitModule("zfs.ioctl", zfsmethods);
+ PyObject *zfs_util = PyImport_ImportModule("zfs.util");
+ PyObject *devfile;
+
+ if (zfs_util == NULL)
+ return;
+
+ ZFSError = PyObject_GetAttrString(zfs_util, "ZFSError");
+ devfile = PyObject_GetAttrString(zfs_util, "dev");
+ zfsdevfd = PyObject_AsFileDescriptor(devfile);
+
+ zfs_prop_init();
+}
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py
new file mode 100644
index 000000000000..1458dc1328fd
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py
@@ -0,0 +1,28 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+import zfs.allow
+
+do_unallow = zfs.allow.do_allow
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py
new file mode 100644
index 000000000000..c269d51e1db7
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py
@@ -0,0 +1,277 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module implements the "zfs userspace" and "zfs groupspace" subcommands.
+The only public interface is the zfs.userspace.do_userspace() function."""
+
+import zfs.util
+import zfs.ioctl
+import zfs.dataset
+import optparse
+import sys
+import pwd
+import grp
+import errno
+
+_ = zfs.util._
+
+# map from property name prefix -> (field name, isgroup)
+props = {
+ "userused@": ("used", False),
+ "userquota@": ("quota", False),
+ "groupused@": ("used", True),
+ "groupquota@": ("quota", True),
+}
+
+def skiptype(options, prop):
+ """Return True if this property (eg "userquota@") should be skipped."""
+ (field, isgroup) = props[prop]
+ if field not in options.fields:
+ return True
+ if isgroup and "posixgroup" not in options.types and \
+ "smbgroup" not in options.types:
+ return True
+ if not isgroup and "posixuser" not in options.types and \
+ "smbuser" not in options.types:
+ return True
+ return False
+
+def updatemax(d, k, v):
+ d[k] = max(d.get(k, None), v)
+
+def new_entry(options, isgroup, domain, rid):
+ """Return a dict("field": value) for this domain (string) + rid (int)"""
+
+ if domain:
+ idstr = "%s-%u" % (domain, rid)
+ else:
+ idstr = "%u" % rid
+
+ (typename, mapfunc) = {
+ (1, 1): ("SMB Group", lambda id: zfs.ioctl.sid_to_name(id, 0)),
+ (1, 0): ("POSIX Group", lambda id: grp.getgrgid(int(id)).gr_name),
+ (0, 1): ("SMB User", lambda id: zfs.ioctl.sid_to_name(id, 1)),
+ (0, 0): ("POSIX User", lambda id: pwd.getpwuid(int(id)).pw_name)
+ }[isgroup, bool(domain)]
+
+ if typename.lower().replace(" ", "") not in options.types:
+ return None
+
+ v = dict()
+ v["type"] = typename
+
+ # python's getpwuid/getgrgid is confused by ephemeral uids
+ if not options.noname and rid < 1<<31:
+ try:
+ v["name"] = mapfunc(idstr)
+ except KeyError:
+ pass
+
+ if "name" not in v:
+ v["name"] = idstr
+ if not domain:
+ # it's just a number, so pad it with spaces so
+ # that it will sort numerically
+ v["name.sort"] = "%20d" % rid
+ # fill in default values
+ v["used"] = "0"
+ v["used.sort"] = 0
+ v["quota"] = "none"
+ v["quota.sort"] = 0
+ return v
+
+def process_one_raw(acct, maxfieldlen, options, prop, elem):
+ """Update the acct and maxfieldlen dicts to incorporate the
+ information from this elem from Dataset.userspace(prop)."""
+
+ (domain, rid, value) = elem
+ (field, isgroup) = props[prop]
+
+ if options.translate and domain:
+ try:
+ rid = zfs.ioctl.sid_to_id("%s-%u" % (domain, rid),
+ not isgroup)
+ domain = None
+ except KeyError:
+ pass;
+ key = (isgroup, domain, rid)
+
+ try:
+ v = acct[key]
+ except KeyError:
+ v = new_entry(options, isgroup, domain, rid)
+ if not v:
+ return
+ acct[key] = v
+
+ # Add our value to an existing value, which may be present if
+ # options.translate is set.
+ value = v[field + ".sort"] = value + v[field + ".sort"]
+
+ if options.parsable:
+ v[field] = str(value)
+ else:
+ v[field] = zfs.util.nicenum(value)
+ for k in v.keys():
+ # some of the .sort fields are integers, so have no len()
+ if isinstance(v[k], str):
+ updatemax(maxfieldlen, k, len(v[k]))
+
+def do_userspace():
+ """Implements the "zfs userspace" and "zfs groupspace" subcommands."""
+
+ def usage(msg=None):
+ parser.print_help()
+ if msg:
+ print
+ parser.exit("zfs: error: " + msg)
+ else:
+ parser.exit()
+
+ if sys.argv[1] == "userspace":
+ defaulttypes = "posixuser,smbuser"
+ else:
+ defaulttypes = "posixgroup,smbgroup"
+
+ fields = ("type", "name", "used", "quota")
+ ljustfields = ("type", "name")
+ types = ("all", "posixuser", "smbuser", "posixgroup", "smbgroup")
+
+ u = _("%s [-niHp] [-o field[,...]] [-sS field] ... \n") % sys.argv[1]
+ u += _(" [-t type[,...]] <filesystem|snapshot>")
+ parser = optparse.OptionParser(usage=u, prog="zfs")
+
+ parser.add_option("-n", action="store_true", dest="noname",
+ help=_("Print numeric ID instead of user/group name"))
+ parser.add_option("-i", action="store_true", dest="translate",
+ help=_("translate SID to posix (possibly ephemeral) ID"))
+ parser.add_option("-H", action="store_true", dest="noheaders",
+ help=_("no headers, tab delimited output"))
+ parser.add_option("-p", action="store_true", dest="parsable",
+ help=_("exact (parsable) numeric output"))
+ parser.add_option("-o", dest="fields", metavar="field[,...]",
+ default="type,name,used,quota",
+ help=_("print only these fields (eg type,name,used,quota)"))
+ parser.add_option("-s", dest="sortfields", metavar="field",
+ type="choice", choices=fields, default=list(),
+ action="callback", callback=zfs.util.append_with_opt,
+ help=_("sort field"))
+ parser.add_option("-S", dest="sortfields", metavar="field",
+ type="choice", choices=fields, #-s sets the default
+ action="callback", callback=zfs.util.append_with_opt,
+ help=_("reverse sort field"))
+ parser.add_option("-t", dest="types", metavar="type[,...]",
+ default=defaulttypes,
+ help=_("print only these types (eg posixuser,smbuser,posixgroup,smbgroup,all)"))
+
+ (options, args) = parser.parse_args(sys.argv[2:])
+ if len(args) != 1:
+ usage(_("wrong number of arguments"))
+ dsname = args[0]
+
+ options.fields = options.fields.split(",")
+ for f in options.fields:
+ if f not in fields:
+ usage(_("invalid field %s") % f)
+
+ options.types = options.types.split(",")
+ for t in options.types:
+ if t not in types:
+ usage(_("invalid type %s") % t)
+
+ if not options.sortfields:
+ options.sortfields = [("-s", "type"), ("-s", "name")]
+
+ if "all" in options.types:
+ options.types = types[1:]
+
+ ds = zfs.dataset.Dataset(dsname, types=("filesystem"))
+
+ if ds.getprop("jailed") and zfs.ioctl.isglobalzone():
+ options.noname = True
+
+ if not ds.getprop("useraccounting"):
+ print(_("Initializing accounting information on old filesystem, please wait..."))
+ ds.userspace_upgrade()
+
+ acct = dict()
+ maxfieldlen = dict()
+
+ # gather and process accounting information
+ for prop in props.keys():
+ if skiptype(options, prop):
+ continue;
+ for elem in ds.userspace(prop):
+ process_one_raw(acct, maxfieldlen, options, prop, elem)
+
+ # print out headers
+ if not options.noheaders:
+ line = str()
+ for field in options.fields:
+ # make sure the field header will fit
+ updatemax(maxfieldlen, field, len(field))
+
+ if field in ljustfields:
+ fmt = "%-*s "
+ else:
+ fmt = "%*s "
+ line += fmt % (maxfieldlen[field], field.upper())
+ print(line)
+
+ # custom sorting func
+ def cmpkey(val):
+ l = list()
+ for (opt, field) in options.sortfields:
+ try:
+ n = val[field + ".sort"]
+ except KeyError:
+ n = val[field]
+ if opt == "-S":
+ # reverse sorting
+ try:
+ n = -n
+ except TypeError:
+ # it's a string; decompose it
+ # into an array of integers,
+ # each one the negative of that
+ # character
+ n = [-ord(c) for c in n]
+ l.append(n)
+ return l
+
+ # print out data lines
+ for val in sorted(acct.itervalues(), key=cmpkey):
+ line = str()
+ for field in options.fields:
+ if options.noheaders:
+ line += val[field]
+ line += "\t"
+ else:
+ if field in ljustfields:
+ fmt = "%-*s "
+ else:
+ fmt = "%*s "
+ line += fmt % (maxfieldlen[field], val[field])
+ print(line)
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/util.py b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py
new file mode 100644
index 000000000000..14d05a8bc12f
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py
@@ -0,0 +1,138 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module provides utility functions for ZFS.
+zfs.util.dev -- a file object of /dev/zfs """
+
+import gettext
+import errno
+import os
+# Note: this module (zfs.util) should not import zfs.ioctl, because that
+# would introduce a circular dependency
+
+errno.ECANCELED = 47
+errno.ENOTSUP = 48
+
+dev = open("/dev/zfs", "w")
+
+_ = gettext.translation("SUNW_OST_OSLIB", "/usr/lib/locale",
+ fallback=True).gettext
+
+def default_repr(self):
+ """A simple __repr__ function."""
+ if self.__slots__:
+ str = "<" + self.__class__.__name__
+ for v in self.__slots__:
+ str += " %s: %r" % (v, getattr(self, v))
+ return str + ">"
+ else:
+ return "<%s %s>" % \
+ (self.__class__.__name__, repr(self.__dict__))
+
+class ZFSError(StandardError):
+ """This exception class represents a potentially user-visible
+ ZFS error. If uncaught, it will be printed and the process will
+ exit with exit code 1.
+
+ errno -- the error number (eg, from ioctl(2))."""
+
+ __slots__ = "why", "task", "errno"
+ __repr__ = default_repr
+
+ def __init__(self, eno, task=None, why=None):
+ """Create a ZFS exception.
+ eno -- the error number (errno)
+ task -- a string describing the task that failed
+ why -- a string describing why it failed (defaults to
+ strerror(eno))"""
+
+ self.errno = eno
+ self.task = task
+ self.why = why
+
+ def __str__(self):
+ s = ""
+ if self.task:
+ s += self.task + ": "
+ if self.why:
+ s += self.why
+ else:
+ s += self.strerror
+ return s
+
+ __strs = {
+ errno.EPERM: _("permission denied"),
+ errno.ECANCELED:
+ _("delegated administration is disabled on pool"),
+ errno.EINTR: _("signal received"),
+ errno.EIO: _("I/O error"),
+ errno.ENOENT: _("dataset does not exist"),
+ errno.ENOSPC: _("out of space"),
+ errno.EEXIST: _("dataset already exists"),
+ errno.EBUSY: _("dataset is busy"),
+ errno.EROFS:
+ _("snapshot permissions cannot be modified"),
+ errno.ENAMETOOLONG: _("dataset name is too long"),
+ errno.ENOTSUP: _("unsupported version"),
+ errno.EAGAIN: _("pool I/O is currently suspended"),
+ }
+
+ __strs[errno.EACCES] = __strs[errno.EPERM]
+ __strs[errno.ENXIO] = __strs[errno.EIO]
+ __strs[errno.ENODEV] = __strs[errno.EIO]
+ __strs[errno.EDQUOT] = __strs[errno.ENOSPC]
+
+ @property
+ def strerror(self):
+ return ZFSError.__strs.get(self.errno, os.strerror(self.errno))
+
+def nicenum(num):
+ """Return a nice string (eg "1.23M") for this integer."""
+ index = 0;
+ n = num;
+
+ while n >= 1024:
+ n /= 1024
+ index += 1
+
+ u = " KMGTPE"[index]
+ if index == 0:
+ return "%u" % n;
+ elif n >= 100 or num & ((1024*index)-1) == 0:
+ # it's an exact multiple of its index, or it wouldn't
+ # fit as floating point, so print as an integer
+ return "%u%c" % (n, u)
+ else:
+ # due to rounding, it's tricky to tell what precision to
+ # use; try each precision and see which one fits
+ for i in (2, 1, 0):
+ s = "%.*f%c" % (i, float(num) / (1<<(10*index)), u)
+ if len(s) <= 5:
+ return s
+
+def append_with_opt(option, opt, value, parser):
+ """A function for OptionParser which appends a tuple (opt, value)."""
+ getattr(parser.values, option.dest).append((opt, value))
+
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
index 04c74a31b874..245e01b5da31 100644
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -49,7 +49,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -317,8 +317,9 @@ typedef struct zio_block_tail {
zio_cksum_t zbt_cksum; /* 256-bit checksum */
} zio_block_tail_t;
-#define VDEV_SKIP_SIZE (8 << 10)
-#define VDEV_BOOT_HEADER_SIZE (8 << 10)
+#define VDEV_PAD_SIZE (8 << 10)
+/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
+#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
@@ -330,26 +331,14 @@ typedef struct zio_block_tail {
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
-/* ZFS boot block */
-#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
-#define VDEV_BOOT_VERSION 1 /* version number */
-
-typedef struct vdev_boot_header {
- uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
- uint64_t vb_version; /* VDEV_BOOT_VERSION */
- uint64_t vb_offset; /* start offset (bytes) */
- uint64_t vb_size; /* size (bytes) */
- char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
-} vdev_boot_header_t;
-
typedef struct vdev_phys {
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
zio_block_tail_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
- char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
- vdev_boot_header_t vl_boot_header; /* 8K */
+ char vl_pad1[VDEV_PAD_SIZE]; /* 8K */
+ char vl_pad2[VDEV_PAD_SIZE]; /* 8K */
vdev_phys_t vl_vdev_phys; /* 112K */
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
} vdev_label_t; /* 256K total */
@@ -480,13 +469,14 @@ typedef enum {
#define SPA_VERSION_12 12ULL
#define SPA_VERSION_13 13ULL
#define SPA_VERSION_14 14ULL
+#define SPA_VERSION_15 15ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understand the on-disk
* format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
* and do the appropriate changes.
*/
-#define SPA_VERSION SPA_VERSION_14
-#define SPA_VERSION_STRING "14"
+#define SPA_VERSION SPA_VERSION_15
+#define SPA_VERSION_STRING "15"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -522,6 +512,7 @@ typedef enum {
#define SPA_VERSION_SNAP_PROPS SPA_VERSION_12
#define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
+#define SPA_VERSION_USERSPACE SPA_VERSION_15
/*
* The following are configuration names used in the nvlist describing a pool's
@@ -799,8 +790,11 @@ typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
- char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
- sizeof (uint64_t)];
+ uint64_t os_flags;
+ char os_pad[2048 - sizeof (dnode_phys_t)*3 -
+ sizeof (zil_header_t) - sizeof (uint64_t)*2];
+ dnode_phys_t os_userused_dnode;
+ dnode_phys_t os_groupused_dnode;
} objset_phys_t;
typedef struct dsl_dir_phys {
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c b/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
index 865fba337f5a..c6347c251581 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
@@ -239,9 +239,8 @@ secpolicy_vnode_create_gid(struct ucred *cred)
}
int
-secpolicy_vnode_setids_setgids(struct vnode *vp, struct ucred *cred, gid_t gid)
+secpolicy_vnode_setids_setgids(vnode_t *vp, struct ucred *cred, gid_t gid)
{
-
if (groupmember(gid, cred))
return (0);
if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
@@ -366,3 +365,10 @@ secpolicy_xvattr(struct vnode *vp, xvattr_t *xvap, uid_t owner, cred_t *cr,
return (0);
return (priv_check_cred(cr, PRIV_VFS_SYSFLAGS, 0));
}
+
+int
+secpolicy_smb(cred_t *cr)
+{
+
+ return (priv_check_cred(cr, PRIV_NETSMB, 0));
+}
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_uio.c b/sys/cddl/compat/opensolaris/kern/opensolaris_uio.c
new file mode 100644
index 000000000000..c319f6280da5
--- /dev/null
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_uio.c
@@ -0,0 +1,112 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+
+/*
+ * same as uiomove() but doesn't modify uio structure.
+ * return in cbytes how many bytes were copied.
+ */
+int
+uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
+{
+ struct iovec *iov;
+ ulong_t cnt;
+ int error, iovcnt;
+
+ iovcnt = uio->uio_iovcnt;
+ *cbytes = 0;
+
+ for (iov = uio->uio_iov; n > 0 && iovcnt > 0; iov++, iovcnt--) {
+ cnt = MIN(iov->iov_len, n);
+ if (cnt == 0)
+ continue;
+
+ switch (uio->uio_segflg) {
+ case UIO_USERSPACE:
+ if (rw == UIO_READ)
+ error = copyout(p, iov->iov_base, cnt);
+ else
+ error = copyin(iov->iov_base, p, cnt);
+ if (error)
+ return (error);
+ break;
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy(p, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, p, cnt);
+ break;
+ }
+
+ p = (caddr_t)p + cnt;
+ n -= cnt;
+ *cbytes += cnt;
+ }
+ return (0);
+}
+
+/*
+ * Drop the next n chars out of *uiop.
+ */
+void
+uioskip(uio_t *uiop, size_t n)
+{
+ if (n > uiop->uio_resid)
+ return;
+ while (n != 0) {
+ register iovec_t *iovp = uiop->uio_iov;
+ register size_t niovb = MIN(iovp->iov_len, n);
+
+ if (niovb == 0) {
+ uiop->uio_iov++;
+ uiop->uio_iovcnt--;
+ continue;
+ }
+ iovp->iov_base += niovb;
+ uiop->uio_loffset += niovb;
+ iovp->iov_len -= niovb;
+ uiop->uio_resid -= niovb;
+ n -= niovb;
+ }
+}
diff --git a/sys/cddl/compat/opensolaris/sys/misc.h b/sys/cddl/compat/opensolaris/sys/misc.h
index 8e1a637a3b68..0343f2f959bd 100644
--- a/sys/cddl/compat/opensolaris/sys/misc.h
+++ b/sys/cddl/compat/opensolaris/sys/misc.h
@@ -43,10 +43,13 @@
#define _FIO_SEEK_DATA FIOSEEKDATA
#define _FIO_SEEK_HOLE FIOSEEKHOLE
+#ifdef _KERNEL
struct opensolaris_utsname {
char *nodename;
};
extern char hw_serial[11];
extern struct opensolaris_utsname utsname;
+#endif
+
#endif /* _OPENSOLARIS_SYS_MISC_H_ */
diff --git a/sys/cddl/compat/opensolaris/sys/policy.h b/sys/cddl/compat/opensolaris/sys/policy.h
index 6731d7cbcd4c..9fd2092fd7de 100644
--- a/sys/cddl/compat/opensolaris/sys/policy.h
+++ b/sys/cddl/compat/opensolaris/sys/policy.h
@@ -72,6 +72,7 @@ int secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp);
void secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp);
int secpolicy_xvattr(struct vnode *vp, xvattr_t *xvap, uid_t owner,
cred_t *cr, vtype_t vtype);
+int secpolicy_smb(cred_t *cr);
#endif /* _KERNEL */
diff --git a/sys/cddl/compat/opensolaris/sys/sid.h b/sys/cddl/compat/opensolaris/sys/sid.h
index eb8d0bed3eeb..d48b1dffff70 100644
--- a/sys/cddl/compat/opensolaris/sys/sid.h
+++ b/sys/cddl/compat/opensolaris/sys/sid.h
@@ -51,4 +51,11 @@ ksiddomain_rele(ksiddomain_t *kd)
kmem_free(kd, sizeof(*kd));
}
+static __inline int
+ksid_getid(void *ksid)
+{
+
+ panic("%s has been unexpectedly called", __func__);
+}
+
#endif /* _OPENSOLARIS_SYS_SID_H_ */
diff --git a/sys/cddl/compat/opensolaris/sys/uio.h b/sys/cddl/compat/opensolaris/sys/uio.h
index 9e53457baf2b..c3fa0bcbf015 100644
--- a/sys/cddl/compat/opensolaris/sys/uio.h
+++ b/sys/cddl/compat/opensolaris/sys/uio.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,9 @@ zfs_uiomove(void *cp, size_t n, enum uio_rw dir, uio_t *uio)
return (uiomove(cp, (int)n, uio));
}
#define uiomove(cp, n, dir, uio) zfs_uiomove((cp), (n), (dir), (uio))
+
+int uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes);
+void uioskip(uio_t *uiop, size_t n);
#endif /* BUILDING_ZFS */
#endif /* !_OPENSOLARIS_SYS_UIO_H_ */
diff --git a/sys/cddl/compat/opensolaris/sys/vnode.h b/sys/cddl/compat/opensolaris/sys/vnode.h
index 7296635cc15b..1d46956629e5 100644
--- a/sys/cddl/compat/opensolaris/sys/vnode.h
+++ b/sys/cddl/compat/opensolaris/sys/vnode.h
@@ -49,6 +49,7 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
#include <sys/syscallsubr.h>
typedef struct vop_vector vnodeops_t;
+#define VOP_FID VOP_VPTOFH
#define vop_fid vop_vptofh
#define vop_fid_args vop_vptofh_args
#define a_fid a_fhp
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c
index 0fd5800a84dc..2964cae5db8e 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c
@@ -19,13 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#if defined(_KERNEL)
#include <sys/systm.h>
#include <sys/sunddi.h>
@@ -66,6 +63,10 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+ {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+ {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+ {ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+ {ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
{NULL, ZFS_DELEG_NOTE_NONE }
};
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h
index 561b73e63df4..cdbbd83de07e 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_DELEG_H
#define _ZFS_DELEG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fs/zfs.h>
#ifdef __cplusplus
@@ -59,6 +57,10 @@ typedef enum {
ZFS_DELEG_NOTE_USERPROP,
ZFS_DELEG_NOTE_MOUNT,
ZFS_DELEG_NOTE_SHARE,
+ ZFS_DELEG_NOTE_USERQUOTA,
+ ZFS_DELEG_NOTE_GROUPQUOTA,
+ ZFS_DELEG_NOTE_USERUSED,
+ ZFS_DELEG_NOTE_GROUPUSED,
ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t;
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c
index a9d109be20ab..45730c6fc4bd 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Common name validation routines for ZFS. These routines are shared by the
* userland code as well as the ioctl() layer to ensure that we don't
@@ -345,19 +343,3 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
return (0);
}
-
-/*
- * Check if the dataset name is private for internal usage.
- * '$' is reserved for internal dataset names. e.g. "$MOS"
- *
- * Return 1 if the given name is used internally.
- * Return 0 if it is not.
- */
-int
-dataset_name_hidden(const char *name)
-{
- if (strchr(name, '$') != NULL)
- return (1);
-
- return (0);
-}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h
index ec85e62f72e8..7711da099be9 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_NAMECHECK_H
#define _ZFS_NAMECHECK_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -50,7 +48,6 @@ typedef enum {
int pool_namecheck(const char *, namecheck_err_t *, char *);
int dataset_namecheck(const char *, namecheck_err_t *, char *);
int mountpoint_namecheck(const char *, namecheck_err_t *);
-int dataset_name_hidden(const char *);
int snapshot_namecheck(const char *, namecheck_err_t *, char *);
int permset_namecheck(const char *, namecheck_err_t *, char *);
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
index 70c08adc78a0..fa98192aa50e 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -43,6 +43,14 @@
static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
+/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
+const char *zfs_userquota_prop_prefixes[] = {
+ "userused@",
+ "userquota@",
+ "groupused@",
+ "groupquota@"
+};
+
zprop_desc_t *
zfs_prop_get_table(void)
{
@@ -133,6 +141,7 @@ zfs_prop_init(void)
{ "1", 1 },
{ "2", 2 },
{ "3", 3 },
+ { "4", 4 },
{ "current", ZPL_VERSION },
{ NULL }
};
@@ -218,7 +227,7 @@ zfs_prop_init(void)
/* default index properties */
register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "1 | 2 | 3 | current", "VERSION", version_table);
+ "1 | 2 | 3 | 4 | current", "VERSION", version_table);
register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
"CANMOUNT", canmount_table);
@@ -307,6 +316,8 @@ zfs_prop_init(void)
PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY,
ZFS_TYPE_DATASET, "GUID");
+ register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
+ PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, NULL);
/* oddball properties */
register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL,
@@ -330,7 +341,6 @@ zfs_name_to_prop(const char *propname)
return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
}
-
/*
* For user property names, we allow all lowercase alphanumeric characters, plus
* a few useful punctuation characters.
@@ -368,6 +378,26 @@ zfs_prop_user(const char *name)
}
/*
+ * Returns true if this is a valid userspace-type property (one with a '@').
+ * Note that after the @, any character is valid (eg, another @, for SID
+ * user@domain).
+ */
+boolean_t
+zfs_prop_userquota(const char *name)
+{
+ zfs_userquota_prop_t prop;
+
+ for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
+ if (strncmp(name, zfs_userquota_prop_prefixes[prop],
+ strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+/*
* Tables of index types, plus functions to convert between the user view
* (strings) and internal representation (uint64_t).
*/
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c
index 87619e1cbf07..d3301b508029 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Common routines used by zfs and zpool property management.
*/
@@ -205,9 +203,6 @@ propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
#ifndef _KERNEL
const char *colname = prop_entry->pd_colname;
int c;
-
- if (colname == NULL)
- return (B_FALSE);
#endif
if (len == strlen(propname) &&
@@ -215,7 +210,7 @@ propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
return (B_TRUE);
#ifndef _KERNEL
- if (len != strlen(colname))
+ if (colname == NULL || len != strlen(colname))
return (B_FALSE);
for (c = 0; c < len; c++)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 860b33c3ee76..2813924ef710 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -462,6 +462,7 @@ static arc_state_t *arc_l2c_only;
static int arc_no_grow; /* Don't try to grow cache size */
static uint64_t arc_tempreserve;
+static uint64_t arc_loaned_bytes;
static uint64_t arc_meta_used;
static uint64_t arc_meta_limit;
static uint64_t arc_meta_max = 0;
@@ -511,7 +512,7 @@ struct arc_buf_hdr {
/* immutable */
arc_buf_contents_t b_type;
uint64_t b_size;
- spa_t *b_spa;
+ uint64_t b_spa;
/* protected by arc state mutex */
arc_state_t *b_state;
@@ -533,9 +534,9 @@ static arc_buf_hdr_t arc_eviction_hdr;
static void arc_get_data_buf(arc_buf_t *buf);
static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
static int arc_evict_needed(arc_buf_contents_t type);
-static void arc_evict_ghost(arc_state_t *state, spa_t *spa, int64_t bytes);
+static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
-static boolean_t l2arc_write_eligible(spa_t *spa, arc_buf_hdr_t *ab);
+static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
#define GHOST_STATE(state) \
((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \
@@ -761,9 +762,8 @@ static void l2arc_hdr_stat_add(void);
static void l2arc_hdr_stat_remove(void);
static uint64_t
-buf_hash(spa_t *spa, const dva_t *dva, uint64_t birth)
+buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
{
- uintptr_t spav = (uintptr_t)spa;
uint8_t *vdva = (uint8_t *)dva;
uint64_t crc = -1ULL;
int i;
@@ -773,7 +773,7 @@ buf_hash(spa_t *spa, const dva_t *dva, uint64_t birth)
for (i = 0; i < sizeof (dva_t); i++)
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
- crc ^= (spav>>8) ^ birth;
+ crc ^= (spa>>8) ^ birth;
return (crc);
}
@@ -789,7 +789,7 @@ buf_hash(spa_t *spa, const dva_t *dva, uint64_t birth)
((buf)->b_birth == birth) && ((buf)->b_spa == spa)
static arc_buf_hdr_t *
-buf_hash_find(spa_t *spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
+buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
{
uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
@@ -1345,7 +1345,7 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
ASSERT(BUF_EMPTY(hdr));
hdr->b_size = size;
hdr->b_type = type;
- hdr->b_spa = spa;
+ hdr->b_spa = spa_guid(spa);
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
@@ -1364,6 +1364,41 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
return (buf);
}
+static char *arc_onloan_tag = "onloan";
+
+/*
+ * Loan out an anonymous arc buffer. Loaned buffers are not counted as in
+ * flight data by arc_tempreserve_space() until they are "returned". Loaned
+ * buffers must be returned to the arc before they can be used by the DMU or
+ * freed.
+ */
+arc_buf_t *
+arc_loan_buf(spa_t *spa, int size)
+{
+ arc_buf_t *buf;
+
+ buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA);
+
+ atomic_add_64(&arc_loaned_bytes, size);
+ return (buf);
+}
+
+/*
+ * Return a loaned arc buffer to the arc.
+ */
+void
+arc_return_buf(arc_buf_t *buf, void *tag)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ ASSERT(hdr->b_state == arc_anon);
+ ASSERT(buf->b_data != NULL);
+ VERIFY(refcount_remove(&hdr->b_refcnt, arc_onloan_tag) == 0);
+ VERIFY(refcount_add(&hdr->b_refcnt, tag) == 1);
+
+ atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
+}
+
static arc_buf_t *
arc_buf_clone(arc_buf_t *from)
{
@@ -1661,7 +1696,7 @@ arc_buf_size(arc_buf_t *buf)
* It may also return without evicting as much space as requested.
*/
static void *
-arc_evict(arc_state_t *state, spa_t *spa, int64_t bytes, boolean_t recycle,
+arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
arc_buf_contents_t type)
{
arc_state_t *evicted_state;
@@ -1830,12 +1865,12 @@ evict_start:
if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) {
int64_t todelete =
MIN(arc_mru_ghost->arcs_lsize[type], mru_over);
- arc_evict_ghost(arc_mru_ghost, NULL, todelete);
+ arc_evict_ghost(arc_mru_ghost, 0, todelete);
} else if (arc_mfu_ghost->arcs_lsize[type] > 0) {
int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type],
arc_mru_ghost->arcs_size +
arc_mfu_ghost->arcs_size - arc_c);
- arc_evict_ghost(arc_mfu_ghost, NULL, todelete);
+ arc_evict_ghost(arc_mfu_ghost, 0, todelete);
}
}
if (stolen)
@@ -1849,7 +1884,7 @@ evict_start:
* bytes. Destroy the buffers that are removed.
*/
static void
-arc_evict_ghost(arc_state_t *state, spa_t *spa, int64_t bytes)
+arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes)
{
arc_buf_hdr_t *ab, *ab_prev;
list_t *list, *list_start;
@@ -1955,13 +1990,13 @@ arc_adjust(void)
if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) {
delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment);
- (void) arc_evict(arc_mru, NULL, delta, FALSE, ARC_BUFC_DATA);
+ (void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_DATA);
adjustment -= delta;
}
if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
- (void) arc_evict(arc_mru, NULL, delta, FALSE,
+ (void) arc_evict(arc_mru, 0, delta, FALSE,
ARC_BUFC_METADATA);
}
@@ -1973,14 +2008,14 @@ arc_adjust(void)
if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) {
delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]);
- (void) arc_evict(arc_mfu, NULL, delta, FALSE, ARC_BUFC_DATA);
+ (void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_DATA);
adjustment -= delta;
}
if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
int64_t delta = MIN(adjustment,
arc_mfu->arcs_lsize[ARC_BUFC_METADATA]);
- (void) arc_evict(arc_mfu, NULL, delta, FALSE,
+ (void) arc_evict(arc_mfu, 0, delta, FALSE,
ARC_BUFC_METADATA);
}
@@ -1992,7 +2027,7 @@ arc_adjust(void)
if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
delta = MIN(arc_mru_ghost->arcs_size, adjustment);
- arc_evict_ghost(arc_mru_ghost, NULL, delta);
+ arc_evict_ghost(arc_mru_ghost, 0, delta);
}
adjustment =
@@ -2000,7 +2035,7 @@ arc_adjust(void)
if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
- arc_evict_ghost(arc_mfu_ghost, NULL, delta);
+ arc_evict_ghost(arc_mfu_ghost, 0, delta);
}
}
@@ -2044,29 +2079,34 @@ restart:
void
arc_flush(spa_t *spa)
{
+ uint64_t guid = 0;
+
+ if (spa)
+ guid = spa_guid(spa);
+
while (arc_mru->arcs_lsize[ARC_BUFC_DATA]) {
- (void) arc_evict(arc_mru, spa, -1, FALSE, ARC_BUFC_DATA);
+ (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
if (spa)
break;
}
while (arc_mru->arcs_lsize[ARC_BUFC_METADATA]) {
- (void) arc_evict(arc_mru, spa, -1, FALSE, ARC_BUFC_METADATA);
+ (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_METADATA);
if (spa)
break;
}
while (arc_mfu->arcs_lsize[ARC_BUFC_DATA]) {
- (void) arc_evict(arc_mfu, spa, -1, FALSE, ARC_BUFC_DATA);
+ (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_DATA);
if (spa)
break;
}
while (arc_mfu->arcs_lsize[ARC_BUFC_METADATA]) {
- (void) arc_evict(arc_mfu, spa, -1, FALSE, ARC_BUFC_METADATA);
+ (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_METADATA);
if (spa)
break;
}
- arc_evict_ghost(arc_mru_ghost, spa, -1);
- arc_evict_ghost(arc_mfu_ghost, spa, -1);
+ arc_evict_ghost(arc_mru_ghost, guid, -1);
+ arc_evict_ghost(arc_mfu_ghost, guid, -1);
mutex_enter(&arc_reclaim_thr_lock);
arc_do_user_evicts();
@@ -2463,7 +2503,7 @@ arc_get_data_buf(arc_buf_t *buf)
state = (arc_mru->arcs_lsize[type] >= size &&
mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
}
- if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) {
+ if ((buf->b_data = arc_evict(state, 0, size, TRUE, type)) == NULL) {
if (type == ARC_BUFC_METADATA) {
buf->b_data = zio_buf_alloc(size);
arc_space_consume(size, ARC_SPACE_DATA);
@@ -2673,7 +2713,7 @@ arc_read_done(zio_t *zio)
* reason for it not to be found is if we were freed during the
* read.
*/
- found = buf_hash_find(zio->io_spa, &hdr->b_dva, hdr->b_birth,
+ found = buf_hash_find(hdr->b_spa, &hdr->b_dva, hdr->b_birth,
&hash_lock);
ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
@@ -2817,9 +2857,10 @@ arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
arc_buf_t *buf;
kmutex_t *hash_lock;
zio_t *rzio;
+ uint64_t guid = spa_guid(spa);
top:
- hdr = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
+ hdr = buf_hash_find(guid, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
if (hdr && hdr->b_datacnt > 0) {
*arc_flags |= ARC_CACHED;
@@ -2842,7 +2883,7 @@ top:
acb->acb_private = private;
if (pio != NULL)
acb->acb_zio_dummy = zio_null(pio,
- spa, NULL, NULL, zio_flags);
+ spa, NULL, NULL, NULL, zio_flags);
ASSERT(acb->acb_done != NULL);
acb->acb_next = hdr->b_acb;
@@ -3084,9 +3125,10 @@ arc_tryread(spa_t *spa, blkptr_t *bp, void *data)
{
arc_buf_hdr_t *hdr;
kmutex_t *hash_mtx;
+ uint64_t guid = spa_guid(spa);
int rc = 0;
- hdr = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_mtx);
+ hdr = buf_hash_find(guid, BP_IDENTITY(bp), bp->blk_birth, &hash_mtx);
if (hdr && hdr->b_datacnt > 0 && !HDR_IO_IN_PROGRESS(hdr)) {
arc_buf_t *buf = hdr->b_buf;
@@ -3254,7 +3296,7 @@ arc_release(arc_buf_t *buf, void *tag)
arc_buf_hdr_t *nhdr;
arc_buf_t **bufp;
uint64_t blksz = hdr->b_size;
- spa_t *spa = hdr->b_spa;
+ uint64_t spa = hdr->b_spa;
arc_buf_contents_t type = hdr->b_type;
uint32_t flags = hdr->b_flags;
@@ -3539,12 +3581,13 @@ arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
arc_buf_hdr_t *ab;
kmutex_t *hash_lock;
zio_t *zio;
+ uint64_t guid = spa_guid(spa);
/*
* If this buffer is in the cache, release it, so it
* can be re-used.
*/
- ab = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
+ ab = buf_hash_find(guid, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
if (ab != NULL) {
/*
* The checksum of blocks to free is not always
@@ -3607,10 +3650,9 @@ arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
}
static int
-arc_memory_throttle(uint64_t reserve, uint64_t txg)
+arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg)
{
#ifdef _KERNEL
- uint64_t inflight_data = arc_anon->arcs_size;
uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count);
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
@@ -3674,6 +3716,7 @@ int
arc_tempreserve_space(uint64_t reserve, uint64_t txg)
{
int error;
+ uint64_t anon_size;
#ifdef ZFS_DEBUG
/*
@@ -3690,11 +3733,18 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
return (ENOMEM);
/*
+ * Don't count loaned bufs as in flight dirty data to prevent long
+ * network delays from blocking transactions that are ready to be
+ * assigned to a txg.
+ */
+ anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
+
+ /*
* Writes will, almost always, require additional memory allocations
* in order to compress/encrypt/etc the data. We therefor need to
* make sure that there is sufficient available memory for this.
*/
- if (error = arc_memory_throttle(reserve, txg))
+ if (error = arc_memory_throttle(reserve, anon_size, txg))
return (error);
/*
@@ -3704,8 +3754,9 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* Note: if two requests come in concurrently, we might let them
* both succeed, when one of them should fail. Not a huge deal.
*/
- if (reserve + arc_tempreserve + arc_anon->arcs_size > arc_c / 2 &&
- arc_anon->arcs_size > arc_c / 4) {
+
+ if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
+ anon_size > arc_c / 4) {
dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
"anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
arc_tempreserve>>10,
@@ -3959,6 +4010,8 @@ arc_fini(void)
buf_fini();
+ ASSERT(arc_loaned_bytes == 0);
+
mutex_destroy(&arc_lowmem_lock);
#ifdef _KERNEL
if (arc_event_lowmem != NULL)
@@ -4103,7 +4156,7 @@ arc_fini(void)
*/
static boolean_t
-l2arc_write_eligible(spa_t *spa, arc_buf_hdr_t *ab)
+l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab)
{
/*
* A buffer is *not* eligible for the L2ARC if it:
@@ -4112,7 +4165,7 @@ l2arc_write_eligible(spa_t *spa, arc_buf_hdr_t *ab)
* 3. has an I/O in progress (it may be an incomplete read).
* 4. is flagged not eligible (zfs property).
*/
- if (ab->b_spa != spa) {
+ if (ab->b_spa != spa_guid) {
ARCSTAT_BUMP(arcstat_l2_write_spa_mismatch);
return (B_FALSE);
}
@@ -4399,11 +4452,15 @@ l2arc_read_done(zio_t *zio)
* storage now. If there *is* a waiter, the caller must
* issue the i/o in a context where it's OK to block.
*/
- if (zio->io_waiter == NULL)
- zio_nowait(zio_read(zio->io_parent,
- cb->l2rcb_spa, &cb->l2rcb_bp,
+ if (zio->io_waiter == NULL) {
+ zio_t *pio = zio_unique_parent(zio);
+
+ ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
+
+ zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
buf->b_data, zio->io_size, arc_read_done, buf,
zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
+ }
}
kmem_free(cb, sizeof (l2arc_read_callback_t));
@@ -4600,6 +4657,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
boolean_t have_lock, full;
l2arc_write_callback_t *cb;
zio_t *pio, *wzio;
+ uint64_t guid = spa_guid(spa);
int try;
ASSERT(dev->l2ad_vdev != NULL);
@@ -4661,7 +4719,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
break;
}
- if (!l2arc_write_eligible(spa, ab)) {
+ if (!l2arc_write_eligible(guid, ab)) {
mutex_exit(hash_lock);
continue;
}
@@ -5001,7 +5059,7 @@ l2arc_fini(void)
void
l2arc_start(void)
{
- if (!(spa_mode & FWRITE))
+ if (!(spa_mode_global & FWRITE))
return;
(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
@@ -5011,7 +5069,7 @@ l2arc_start(void)
void
l2arc_stop(void)
{
- if (!(spa_mode & FWRITE))
+ if (!(spa_mode_global & FWRITE))
return;
mutex_enter(&l2arc_feed_thr_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index fe50ecfe7052..cf983e234df5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -327,7 +327,7 @@ dbuf_verify(dmu_buf_impl_t *db)
if (db->db_parent == dn->dn_dbuf) {
/* db is pointed to by the dnode */
/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
- if (db->db.db_object == DMU_META_DNODE_OBJECT)
+ if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
ASSERT(db->db_parent == NULL);
else
ASSERT(db->db_parent != NULL);
@@ -899,15 +899,11 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* Shouldn't dirty a regular buffer in syncing context. Private
* objects may be dirtied in syncing context, but only if they
* were already pre-dirtied in open context.
- * XXX We may want to prohibit dirtying in syncing context even
- * if they did pre-dirty.
*/
ASSERT(!dmu_tx_is_syncing(tx) ||
BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
- dn->dn_object == DMU_META_DNODE_OBJECT ||
- dn->dn_objset->os_dsl_dataset == NULL ||
- dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir));
-
+ DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+ dn->dn_objset->os_dsl_dataset == NULL);
/*
* We make this assert for private objects as well, but after we
* check if we're already dirty. They are allowed to re-dirty
@@ -965,7 +961,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
/*
* Only valid if not already dirty.
*/
- ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
+ ASSERT(dn->dn_object == 0 ||
+ dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
ASSERT3U(dn->dn_nlevels, >, db->db_level);
@@ -977,15 +974,13 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
/*
* We should only be dirtying in syncing context if it's the
- * mos, a spa os, or we're initializing the os. However, we are
- * allowed to dirty in syncing context provided we already
- * dirtied it in open context. Hence we must make this
- * assertion only if we're not already dirty.
+ * mos or we're initializing the os or it's a special object.
+ * However, we are allowed to dirty in syncing context provided
+ * we already dirtied it in open context. Hence we must make
+ * this assertion only if we're not already dirty.
*/
- ASSERT(!dmu_tx_is_syncing(tx) ||
- os->os_dsl_dataset == NULL ||
- !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
- !BP_IS_HOLE(os->os_rootbp));
+ ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+ os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
ASSERT(db->db.db_size != 0);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@@ -1285,6 +1280,68 @@ dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
/*
+ * Directly assign a provided arc buf to a given dbuf if it's not referenced
+ * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
+ */
+void
+dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
+{
+ ASSERT(!refcount_is_zero(&db->db_holds));
+ ASSERT(db->db_dnode->dn_object != DMU_META_DNODE_OBJECT);
+ ASSERT(db->db_blkid != DB_BONUS_BLKID);
+ ASSERT(db->db_level == 0);
+ ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
+ ASSERT(buf != NULL);
+ ASSERT(arc_buf_size(buf) == db->db.db_size);
+ ASSERT(tx->tx_txg != 0);
+
+ arc_return_buf(buf, db);
+ ASSERT(arc_released(buf));
+
+ mutex_enter(&db->db_mtx);
+
+ while (db->db_state == DB_READ || db->db_state == DB_FILL)
+ cv_wait(&db->db_changed, &db->db_mtx);
+
+ ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
+
+ if (db->db_state == DB_CACHED &&
+ refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+ mutex_exit(&db->db_mtx);
+ (void) dbuf_dirty(db, tx);
+ bcopy(buf->b_data, db->db.db_data, db->db.db_size);
+ VERIFY(arc_buf_remove_ref(buf, db) == 1);
+ return;
+ }
+
+ if (db->db_state == DB_CACHED) {
+ dbuf_dirty_record_t *dr = db->db_last_dirty;
+
+ ASSERT(db->db_buf != NULL);
+ if (dr != NULL && dr->dr_txg == tx->tx_txg) {
+ ASSERT(dr->dt.dl.dr_data == db->db_buf);
+ if (!arc_released(db->db_buf)) {
+ ASSERT(dr->dt.dl.dr_override_state ==
+ DR_OVERRIDDEN);
+ arc_release(db->db_buf, db);
+ }
+ dr->dt.dl.dr_data = buf;
+ VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+ } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
+ arc_release(db->db_buf, db);
+ VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+ }
+ db->db_buf = NULL;
+ }
+ ASSERT(db->db_buf == NULL);
+ dbuf_set_data(db, buf);
+ db->db_state = DB_FILL;
+ mutex_exit(&db->db_mtx);
+ (void) dbuf_dirty(db, tx);
+ dbuf_fill_done(db, tx);
+}
+
+/*
* "Clear" the contents of this dbuf. This will mark the dbuf
* EVICTING and clear *most* of its references. Unfortunetely,
* when we are not holding the dn_dbufs_mtx, we can't clear the
@@ -1827,6 +1884,19 @@ dmu_buf_get_user(dmu_buf_t *db_fake)
return (db->db_user_ptr);
}
+boolean_t
+dmu_buf_freeable(dmu_buf_t *dbuf)
+{
+ boolean_t res = B_FALSE;
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+
+ if (db->db_blkptr)
+ res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
+ db->db_blkptr->blk_birth);
+
+ return (res);
+}
+
static void
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index 115278125109..133343b8936c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -82,6 +82,8 @@ const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ byteswap_uint64_array, TRUE, "FUID table size" },
{ zap_byteswap, TRUE, "DSL dataset next clones"},
{ zap_byteswap, TRUE, "scrub work queue" },
+ { zap_byteswap, TRUE, "ZFS user/group used" },
+ { zap_byteswap, TRUE, "ZFS user/group quota" },
};
int
@@ -177,22 +179,22 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
* whose dnodes are in the same block.
*/
static int
-dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
+ int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dsl_pool_t *dp = NULL;
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
- uint32_t flags;
+ uint32_t dbuf_flags;
int err;
zio_t *zio;
hrtime_t start;
ASSERT(length <= DMU_MAX_ACCESS);
- flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
- if (length > zfetch_array_rd_sz)
- flags |= DB_RF_NOPREFETCH;
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
+ if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
+ dbuf_flags |= DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
@@ -230,7 +232,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
/* initiate async i/o */
if (read) {
rw_exit(&dn->dn_struct_rwlock);
- (void) dbuf_read(db, zio, flags);
+ (void) dbuf_read(db, zio, dbuf_flags);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
}
dbp[i] = &db->db;
@@ -282,7 +284,7 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
return (err);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
dnode_rele(dn, FTAG);
@@ -297,7 +299,7 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
int err;
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
return (err);
}
@@ -434,7 +436,8 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
object_size = align == 1 ? dn->dn_datablksz :
(dn->dn_maxblkid + 1) << dn->dn_datablkshift;
- if (trunc || (end = offset + length) > object_size)
+ end = offset + length;
+ if (trunc || end > object_size)
end = object_size;
if (end <= offset)
return (0);
@@ -442,6 +445,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
while (length) {
start = end;
+ /* assert(offset <= start) */
err = get_next_chunk(dn, &start, offset);
if (err)
return (err);
@@ -532,7 +536,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf)
+ void *buf, uint32_t flags)
{
dnode_t *dn;
dmu_buf_t **dbp;
@@ -562,7 +566,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
* to be reading in parallel.
*/
err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
- TRUE, FTAG, &numbufs, &dbp);
+ TRUE, FTAG, &numbufs, &dbp, flags);
if (err)
break;
@@ -771,9 +775,6 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
- if (err)
- break;
-
offset += tocpy;
size -= tocpy;
}
@@ -783,6 +784,58 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
#endif /* !__FreeBSD__ */
#endif /* _KERNEL */
+/*
+ * Allocate a loaned anonymous arc buffer.
+ */
+arc_buf_t *
+dmu_request_arcbuf(dmu_buf_t *handle, int size)
+{
+ dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
+
+ return (arc_loan_buf(dn->dn_objset->os_spa, size));
+}
+
+/*
+ * Free a loaned arc buffer.
+ */
+void
+dmu_return_arcbuf(arc_buf_t *buf)
+{
+ arc_return_buf(buf, FTAG);
+ VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
+}
+
+/*
+ * When possible directly assign passed loaned arc buffer to a dbuf.
+ * If this is not possible copy the contents of passed arc buf via
+ * dmu_write().
+ */
+void
+dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+ dmu_tx_t *tx)
+{
+ dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
+ dmu_buf_impl_t *db;
+ uint32_t blksz = (uint32_t)arc_buf_size(buf);
+ uint64_t blkid;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ blkid = dbuf_whichblock(dn, offset);
+ VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
+ rw_exit(&dn->dn_struct_rwlock);
+
+ if (offset == db->db.db_offset && blksz == db->db.db_size) {
+ dbuf_assign_arcbuf(db, buf, tx);
+ dbuf_rele(db, FTAG);
+ } else {
+ dbuf_rele(db, FTAG);
+ ASSERT(dn->dn_objset->os.os == dn->dn_objset);
+ dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz,
+ buf->b_data, tx);
+ dmu_return_arcbuf(buf);
+ }
+}
+
typedef struct {
dbuf_dirty_record_t *dr;
dmu_sync_cb_t *done;
@@ -794,14 +847,20 @@ static void
dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
{
blkptr_t *bp = zio->io_bp;
+ dmu_sync_arg_t *in = varg;
+ dbuf_dirty_record_t *dr = in->dr;
+ dmu_buf_impl_t *db = dr->dr_dbuf;
if (!BP_IS_HOLE(bp)) {
- dmu_sync_arg_t *in = varg;
- dbuf_dirty_record_t *dr = in->dr;
- dmu_buf_impl_t *db = dr->dr_dbuf;
ASSERT(BP_GET_TYPE(bp) == db->db_dnode->dn_type);
ASSERT(BP_GET_LEVEL(bp) == 0);
bp->blk_fill = 1;
+ } else {
+ /*
+ * dmu_sync() can compress a block of zeros to a null blkptr
+ * but the block size still needs to be passed through to replay
+ */
+ BP_SET_LSIZE(bp, db->db.db_size);
}
}
@@ -817,6 +876,8 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
mutex_enter(&db->db_mtx);
ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
dr->dt.dl.dr_overridden_by = *zio->io_bp; /* structure assignment */
+ if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by))
+ BP_ZERO(&dr->dt.dl.dr_overridden_by);
dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
cv_broadcast(&db->db_changed);
mutex_exit(&db->db_mtx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
index c9e00d511516..2678b839fda7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -164,10 +164,15 @@ dmu_objset_byteswap(void *buf, size_t size)
{
objset_phys_t *osp = buf;
- ASSERT(size == sizeof (objset_phys_t));
+ ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
dnode_byteswap(&osp->os_meta_dnode);
byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
osp->os_type = BSWAP_64(osp->os_type);
+ osp->os_flags = BSWAP_64(osp->os_flags);
+ if (size == sizeof (objset_phys_t)) {
+ dnode_byteswap(&osp->os_userused_dnode);
+ dnode_byteswap(&osp->os_groupused_dnode);
+ }
}
int
@@ -210,12 +215,30 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
err = EIO;
return (err);
}
+
+ /* Increase the blocksize if we are permitted. */
+ if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
+ arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
+ arc_buf_t *buf = arc_buf_alloc(spa,
+ sizeof (objset_phys_t), &osi->os_phys_buf,
+ ARC_BUFC_METADATA);
+ bzero(buf->b_data, sizeof (objset_phys_t));
+ bcopy(osi->os_phys_buf->b_data, buf->b_data,
+ arc_buf_size(osi->os_phys_buf));
+ (void) arc_buf_remove_ref(osi->os_phys_buf,
+ &osi->os_phys_buf);
+ osi->os_phys_buf = buf;
+ }
+
osi->os_phys = osi->os_phys_buf->b_data;
+ osi->os_flags = osi->os_phys->os_flags;
} else {
- osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
+ int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
+ sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
+ osi->os_phys_buf = arc_buf_alloc(spa, size,
&osi->os_phys_buf, ARC_BUFC_METADATA);
osi->os_phys = osi->os_phys_buf->b_data;
- bzero(osi->os_phys, sizeof (objset_phys_t));
+ bzero(osi->os_phys, size);
}
/*
@@ -276,6 +299,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
osi->os_meta_dnode = dnode_special_open(osi,
&osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
+ if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
+ osi->os_userused_dnode = dnode_special_open(osi,
+ &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
+ osi->os_groupused_dnode = dnode_special_open(osi,
+ &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
+ }
/*
* We should be the only thread trying to do this because we
@@ -456,13 +485,15 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg)
os.os = osi;
(void) dmu_objset_evict_dbufs(&os);
- ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL);
-
dnode_special_close(osi->os_meta_dnode);
+ if (osi->os_userused_dnode) {
+ dnode_special_close(osi->os_userused_dnode);
+ dnode_special_close(osi->os_groupused_dnode);
+ }
zil_free(osi->os_zil);
+ ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
+
VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
mutex_destroy(&osi->os_lock);
mutex_destroy(&osi->os_obj_lock);
@@ -520,6 +551,10 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
ASSERT(type != DMU_OST_ANY);
ASSERT(type < DMU_OST_NUMTYPES);
osi->os_phys->os_type = type;
+ if (dmu_objset_userused_enabled(osi)) {
+ osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ osi->os_flags = osi->os_phys->os_flags;
+ }
dsl_dataset_dirty(ds, tx);
@@ -704,13 +739,33 @@ struct snaparg {
char *snapname;
char failed[MAXPATHLEN];
boolean_t checkperms;
- list_t objsets;
+ nvlist_t *props;
};
-struct osnode {
- list_node_t node;
- objset_t *os;
-};
+static int
+snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ objset_t *os = arg1;
+ struct snaparg *sn = arg2;
+
+ /* The props have already been checked by zfs_check_userprops(). */
+
+ return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset,
+ sn->snapname, tx));
+}
+
+static void
+snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+ objset_t *os = arg1;
+ dsl_dataset_t *ds = os->os->os_dsl_dataset;
+ struct snaparg *sn = arg2;
+
+ dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
+
+ if (sn->props)
+ dsl_props_set_sync(ds->ds_prev, sn->props, cr, tx);
+}
static int
dmu_objset_snapshot_one(char *name, void *arg)
@@ -747,13 +802,8 @@ dmu_objset_snapshot_one(char *name, void *arg)
*/
err = zil_suspend(dmu_objset_zil(os));
if (err == 0) {
- struct osnode *osn;
- dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check,
- dsl_dataset_snapshot_sync, os->os->os_dsl_dataset,
- sn->snapname, 3);
- osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP);
- osn->os = os;
- list_insert_tail(&sn->objsets, osn);
+ dsl_sync_task_create(sn->dstg, snapshot_check,
+ snapshot_sync, os, sn, 3);
} else {
dmu_objset_close(os);
}
@@ -762,11 +812,11 @@ dmu_objset_snapshot_one(char *name, void *arg)
}
int
-dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
+dmu_objset_snapshot(char *fsname, char *snapname,
+ nvlist_t *props, boolean_t recursive)
{
dsl_sync_task_t *dst;
- struct osnode *osn;
- struct snaparg sn = { 0 };
+ struct snaparg sn;
spa_t *spa;
int err;
@@ -778,8 +828,7 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
sn.snapname = snapname;
- list_create(&sn.objsets, sizeof (struct osnode),
- offsetof(struct osnode, node));
+ sn.props = props;
if (recursive) {
sn.checkperms = B_TRUE;
@@ -790,27 +839,19 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
err = dmu_objset_snapshot_one(fsname, &sn);
}
- if (err)
- goto out;
-
- err = dsl_sync_task_group_wait(sn.dstg);
+ if (err == 0)
+ err = dsl_sync_task_group_wait(sn.dstg);
for (dst = list_head(&sn.dstg->dstg_tasks); dst;
dst = list_next(&sn.dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
+ objset_t *os = dst->dst_arg1;
+ dsl_dataset_t *ds = os->os->os_dsl_dataset;
if (dst->dst_err)
dsl_dataset_name(ds, sn.failed);
+ zil_resume(dmu_objset_zil(os));
+ dmu_objset_close(os);
}
-out:
- while (osn = list_head(&sn.objsets)) {
- list_remove(&sn.objsets, osn);
- zil_resume(dmu_objset_zil(osn->os));
- dmu_objset_close(osn->os);
- kmem_free(osn, sizeof (struct osnode));
- }
- list_destroy(&sn.objsets);
-
if (err)
(void) strcpy(fsname, sn.failed);
dsl_sync_task_group_destroy(sn.dstg);
@@ -819,7 +860,7 @@ out:
}
static void
-dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
+dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
{
dnode_t *dn;
@@ -827,14 +868,20 @@ dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
ASSERT(dn->dn_dbuf->db_data_pending);
/*
- * Initialize dn_zio outside dnode_sync()
- * to accomodate meta-dnode
+ * Initialize dn_zio outside dnode_sync() because the
+ * meta-dnode needs to set it ouside dnode_sync().
*/
dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
ASSERT(dn->dn_zio);
ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
list_remove(list, dn);
+
+ if (newlist) {
+ (void) dnode_add_ref(dn, newlist);
+ list_insert_tail(newlist, dn);
+ }
+
dnode_sync(dn, tx);
}
}
@@ -853,9 +900,12 @@ ready(zio_t *zio, arc_buf_t *abuf, void *arg)
ASSERT(BP_GET_LEVEL(bp) == 0);
/*
- * Update rootbp fill count.
+ * Update rootbp fill count: it should be the number of objects
+ * allocated in the object set (not counting the "special"
+ * objects that are stored in the objset_phys_t -- the meta
+ * dnode and user/group accounting objects).
*/
- bp->blk_fill = 1; /* count the meta-dnode */
+ bp->blk_fill = 0;
for (int i = 0; i < dnp->dn_nblkptr; i++)
bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
@@ -878,6 +928,7 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
writeprops_t wp = { 0 };
zio_t *zio;
list_t *list;
+ list_t *newlist = NULL;
dbuf_dirty_record_t *dr;
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
@@ -915,20 +966,41 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
}
arc_release(os->os_phys_buf, &os->os_phys_buf);
+
zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
/*
- * Sync meta-dnode - the parent IO for the sync is the root block
+ * Sync special dnodes - the parent IO for the sync is the root block
*/
os->os_meta_dnode->dn_zio = zio;
dnode_sync(os->os_meta_dnode, tx);
+ os->os_phys->os_flags = os->os_flags;
+
+ if (os->os_userused_dnode &&
+ os->os_userused_dnode->dn_type != DMU_OT_NONE) {
+ os->os_userused_dnode->dn_zio = zio;
+ dnode_sync(os->os_userused_dnode, tx);
+ os->os_groupused_dnode->dn_zio = zio;
+ dnode_sync(os->os_groupused_dnode, tx);
+ }
+
txgoff = tx->tx_txg & TXG_MASK;
- dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
- dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);
+ if (dmu_objset_userused_enabled(os)) {
+ newlist = &os->os_synced_dnodes;
+ /*
+ * We must create the list here because it uses the
+ * dn_dirty_link[] of this txg.
+ */
+ list_create(newlist, sizeof (dnode_t),
+ offsetof(dnode_t, dn_dirty_link[txgoff]));
+ }
+
+ dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
+ dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
list = &os->os_meta_dnode->dn_dirty_records[txgoff];
while (dr = list_head(list)) {
@@ -945,6 +1017,146 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
zio_nowait(zio);
}
+static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+
+void
+dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+{
+ used_cbs[ost] = cb;
+}
+
+boolean_t
+dmu_objset_userused_enabled(objset_impl_t *os)
+{
+ return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
+ used_cbs[os->os_phys->os_type] &&
+ os->os_userused_dnode);
+}
+
+void
+dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ list_t *list = &os->os_synced_dnodes;
+ static const char zerobuf[DN_MAX_BONUSLEN] = {0};
+
+ ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+
+ while (dn = list_head(list)) {
+ dmu_object_type_t bonustype;
+
+ ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
+ ASSERT(dn->dn_oldphys);
+ ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
+ dn->dn_phys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED);
+
+ /* Allocate the user/groupused objects if necessary. */
+ if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
+ VERIFY(0 == zap_create_claim(&os->os,
+ DMU_USERUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ VERIFY(0 == zap_create_claim(&os->os,
+ DMU_GROUPUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ }
+
+ /*
+ * If the object was not previously
+ * accounted, pretend that it was free.
+ */
+ if (!(dn->dn_oldphys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED)) {
+ bzero(dn->dn_oldphys, sizeof (dnode_phys_t));
+ }
+
+ /*
+ * If the object was freed, use the previous bonustype.
+ */
+ bonustype = dn->dn_phys->dn_bonustype ?
+ dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype;
+ ASSERT(dn->dn_phys->dn_type != 0 ||
+ (bcmp(DN_BONUS(dn->dn_phys), zerobuf,
+ DN_MAX_BONUSLEN) == 0 &&
+ DN_USED_BYTES(dn->dn_phys) == 0));
+ ASSERT(dn->dn_oldphys->dn_type != 0 ||
+ (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
+ DN_MAX_BONUSLEN) == 0 &&
+ DN_USED_BYTES(dn->dn_oldphys) == 0));
+ used_cbs[os->os_phys->os_type](&os->os, bonustype,
+ DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
+ DN_USED_BYTES(dn->dn_oldphys),
+ DN_USED_BYTES(dn->dn_phys), tx);
+
+ /*
+ * The mutex is needed here for interlock with dnode_allocate.
+ */
+ mutex_enter(&dn->dn_mtx);
+ zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t));
+ dn->dn_oldphys = NULL;
+ mutex_exit(&dn->dn_mtx);
+
+ list_remove(list, dn);
+ dnode_rele(dn, list);
+ }
+}
+
+boolean_t
+dmu_objset_userspace_present(objset_t *os)
+{
+ return (os->os->os_phys->os_flags &
+ OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+}
+
+int
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+ uint64_t obj;
+ int err = 0;
+
+ if (dmu_objset_userspace_present(os))
+ return (0);
+ if (!dmu_objset_userused_enabled(os->os))
+ return (ENOTSUP);
+ if (dmu_objset_is_snapshot(os))
+ return (EINVAL);
+
+ /*
+ * We simply need to mark every object dirty, so that it will be
+ * synced out and now accounted. If this is called
+ * concurrently, or if we already did some work before crashing,
+ * that's fine, since we track each object's accounted state
+ * independently.
+ */
+
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ int objerr;
+
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ return (EINTR);
+
+ objerr = dmu_bonus_hold(os, obj, FTAG, &db);
+ if (objerr)
+ continue;
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, obj);
+ objerr = dmu_tx_assign(tx, TXG_WAIT);
+ if (objerr) {
+ dmu_tx_abort(tx);
+ continue;
+ }
+ dmu_buf_will_dirty(db, tx);
+ dmu_buf_rele(db, FTAG);
+ dmu_tx_commit(tx);
+ }
+
+ os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ return (0);
+}
+
void
dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -978,6 +1190,8 @@ dmu_objset_stats(objset_t *os, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
os->os->os_phys->os_type);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
+ dmu_objset_userspace_present(os));
}
int
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
index 6effae839bbb..ed5afb4e1df5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
@@ -180,7 +180,9 @@ backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
if (issig(JUSTLOOKING) && issig(FORREAL))
return (EINTR);
- if (bp == NULL && zb->zb_object == 0) {
+ if (zb->zb_object != 0 && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
+ return (0);
+ } else if (bp == NULL && zb->zb_object == 0) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index ef0284d616ea..89cbfad29f84 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -64,6 +64,9 @@ struct traverse_data {
void *td_arg;
};
+static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
+ arc_buf_t *buf, uint64_t objset, uint64_t object);
+
/* ARGSUSED */
static void
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
@@ -119,7 +122,7 @@ traverse_zil(struct traverse_data *td, zil_header_t *zh)
* We only want to visit blocks that have been claimed but not yet
* replayed (or, in read-only mode, blocks that *would* be claimed).
*/
- if (claim_txg == 0 && (spa_mode & FWRITE))
+ if (claim_txg == 0 && spa_writeable(td->td_spa))
return;
zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
@@ -189,7 +192,7 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
}
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_WAIT;
- int i, j;
+ int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
err = arc_read(NULL, td->td_spa, bp, pbuf,
@@ -201,20 +204,15 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
/* recursively visitbp() blocks below this */
dnp = buf->b_data;
for (i = 0; i < epb && err == 0; i++, dnp++) {
- for (j = 0; j < dnp->dn_nblkptr; j++) {
- SET_BOOKMARK(&czb, zb->zb_objset,
- zb->zb_blkid * epb + i,
- dnp->dn_nlevels - 1, j);
- err = traverse_visitbp(td, dnp, buf,
- (blkptr_t *)&dnp->dn_blkptr[j], &czb);
- if (err)
- break;
- }
+ err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+ zb->zb_blkid * epb + i);
+ if (err)
+ break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
- int j;
+ dnode_phys_t *dnp;
err = arc_read_nolock(NULL, td->td_spa, bp,
arc_getbuf_func, &buf,
@@ -225,14 +223,17 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
osp = buf->b_data;
traverse_zil(td, &osp->os_zil_header);
- for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
- SET_BOOKMARK(&czb, zb->zb_objset, 0,
- osp->os_meta_dnode.dn_nlevels - 1, j);
- err = traverse_visitbp(td, &osp->os_meta_dnode, buf,
- (blkptr_t *)&osp->os_meta_dnode.dn_blkptr[j],
- &czb);
- if (err)
- break;
+ dnp = &osp->os_meta_dnode;
+ err = traverse_dnode(td, dnp, buf, zb->zb_objset, 0);
+ if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+ dnp = &osp->os_userused_dnode;
+ err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+ DMU_USERUSED_OBJECT);
+ }
+ if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+ dnp = &osp->os_groupused_dnode;
+ err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+ DMU_GROUPUSED_OBJECT);
}
}
@@ -245,6 +246,23 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
return (err);
}
+static int
+traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
+ arc_buf_t *buf, uint64_t objset, uint64_t object)
+{
+ int j, err = 0;
+ zbookmark_t czb;
+
+ for (j = 0; j < dnp->dn_nblkptr; j++) {
+ SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+ err = traverse_visitbp(td, dnp, buf,
+ (blkptr_t *)&dnp->dn_blkptr[j], &czb);
+ if (err)
+ break;
+ }
+ return (err);
+}
+
/* ARGSUSED */
static int
traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
index bfa5699d74e3..b6a5cdbb89cd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -160,6 +160,41 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
return (err);
}
+static void
+dmu_tx_count_indirects(dmu_tx_hold_t *txh, dmu_buf_impl_t *db,
+ boolean_t freeable, dmu_buf_impl_t **history)
+{
+ int i = db->db_level + 1;
+ dnode_t *dn = db->db_dnode;
+
+ if (i >= dn->dn_nlevels)
+ return;
+
+ db = db->db_parent;
+ if (db == NULL) {
+ uint64_t lvls = dn->dn_nlevels - i;
+
+ txh->txh_space_towrite += lvls << dn->dn_indblkshift;
+ return;
+ }
+
+ if (db != history[i]) {
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ uint64_t space = 1ULL << dn->dn_indblkshift;
+
+ freeable = (db->db_blkptr && (freeable ||
+ dsl_dataset_block_freeable(ds, db->db_blkptr->blk_birth)));
+ if (freeable)
+ txh->txh_space_tooverwrite += space;
+ else
+ txh->txh_space_towrite += space;
+ if (db->db_blkptr)
+ txh->txh_space_tounref += space;
+ history[i] = db;
+ dmu_tx_count_indirects(txh, db, freeable, history);
+ }
+}
+
/* ARGSUSED */
static void
dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
@@ -177,17 +212,26 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
min_ibs = DN_MIN_INDBLKSHIFT;
max_ibs = DN_MAX_INDBLKSHIFT;
+ if (dn) {
+ dmu_buf_impl_t *last[DN_MAX_LEVELS];
+ int nlvls = dn->dn_nlevels;
+ int delta;
- /*
- * For i/o error checking, read the first and last level-0
- * blocks (if they are not aligned), and all the level-1 blocks.
- */
+ /*
+ * For i/o error checking, read the first and last level-0
+ * blocks (if they are not aligned), and all the level-1 blocks.
+ */
- if (dn) {
if (dn->dn_maxblkid == 0) {
- err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err)
- goto out;
+ delta = dn->dn_datablksz;
+ start = (off < dn->dn_datablksz) ? 0 : 1;
+ end = (off+len <= dn->dn_datablksz) ? 0 : 1;
+ if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
+ err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+ if (err)
+ goto out;
+ delta -= off;
+ }
} else {
zio_t *zio = zio_root(dn->dn_objset->os_spa,
NULL, NULL, ZIO_FLAG_CANFAIL);
@@ -211,10 +255,9 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
}
/* level-1 blocks */
- if (dn->dn_nlevels > 1) {
- start >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (i = start+1; i < end; i++) {
+ if (nlvls > 1) {
+ int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+ for (i = (start>>shft)+1; i < end>>shft; i++) {
err = dmu_tx_check_ioerr(zio, dn, 1, i);
if (err)
goto out;
@@ -224,20 +267,70 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
err = zio_wait(zio);
if (err)
goto out;
+ delta = P2NPHASE(off, dn->dn_datablksz);
}
- }
- /*
- * If there's more than one block, the blocksize can't change,
- * so we can make a more precise estimate. Alternatively,
- * if the dnode's ibs is larger than max_ibs, always use that.
- * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
- * the code will still work correctly on existing pools.
- */
- if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) {
- min_ibs = max_ibs = dn->dn_indblkshift;
- if (dn->dn_datablkshift != 0)
+ if (dn->dn_maxblkid > 0) {
+ /*
+ * The blocksize can't change,
+ * so we can make a more precise estimate.
+ */
+ ASSERT(dn->dn_datablkshift != 0);
min_bs = max_bs = dn->dn_datablkshift;
+ min_ibs = max_ibs = dn->dn_indblkshift;
+ } else if (dn->dn_indblkshift > max_ibs) {
+ /*
+ * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
+ * the code will still work correctly on older pools.
+ */
+ min_ibs = max_ibs = dn->dn_indblkshift;
+ }
+
+ /*
+ * If this write is not off the end of the file
+ * we need to account for overwrites/unref.
+ */
+ if (start <= dn->dn_maxblkid)
+ bzero(last, sizeof (dmu_buf_impl_t *) * DN_MAX_LEVELS);
+ while (start <= dn->dn_maxblkid) {
+ spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ dmu_buf_impl_t *db;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ db = dbuf_hold_level(dn, 0, start, FTAG);
+ rw_exit(&dn->dn_struct_rwlock);
+ if (db->db_blkptr && dsl_dataset_block_freeable(ds,
+ db->db_blkptr->blk_birth)) {
+ dprintf_bp(db->db_blkptr, "can free old%s", "");
+ txh->txh_space_tooverwrite += dn->dn_datablksz;
+ txh->txh_space_tounref += dn->dn_datablksz;
+ dmu_tx_count_indirects(txh, db, TRUE, last);
+ } else {
+ txh->txh_space_towrite += dn->dn_datablksz;
+ if (db->db_blkptr)
+ txh->txh_space_tounref +=
+ bp_get_dasize(spa, db->db_blkptr);
+ dmu_tx_count_indirects(txh, db, FALSE, last);
+ }
+ dbuf_rele(db, FTAG);
+ if (++start > end) {
+ /*
+ * Account for new indirects appearing
+ * before this IO gets assigned into a txg.
+ */
+ bits = 64 - min_bs;
+ epbs = min_ibs - SPA_BLKPTRSHIFT;
+ for (bits -= epbs * (nlvls - 1);
+ bits >= 0; bits -= epbs)
+ txh->txh_fudge += 1ULL << max_ibs;
+ goto out;
+ }
+ off += delta;
+ if (len >= delta)
+ len -= delta;
+ delta = dn->dn_datablksz;
+ }
}
/*
@@ -260,20 +353,22 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
start >>= epbs;
end >>= epbs;
- /*
- * If we increase the number of levels of indirection,
- * we'll need new blkid=0 indirect blocks. If start == 0,
- * we're already accounting for that blocks; and if end == 0,
- * we can't increase the number of levels beyond that.
- */
- if (start != 0 && end != 0)
- txh->txh_space_towrite += 1ULL << max_ibs;
+ ASSERT3U(end, >=, start);
txh->txh_space_towrite += (end - start + 1) << max_ibs;
+ if (start != 0) {
+ /*
+ * We also need a new blkid=0 indirect block
+ * to reference any existing file data.
+ */
+ txh->txh_space_towrite += 1ULL << max_ibs;
+ }
}
- ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS);
-
out:
+ if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
+ 2 * DMU_MAX_ACCESS)
+ err = EFBIG;
+
if (err)
txh->txh_tx->tx_err = err;
}
@@ -290,6 +385,7 @@ dmu_tx_count_dnode(dmu_tx_hold_t *txh)
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
dn->dn_dbuf->db_blkptr->blk_birth)) {
txh->txh_space_tooverwrite += space;
+ txh->txh_space_tounref += space;
} else {
txh->txh_space_towrite += space;
if (dn && dn->dn_dbuf->db_blkptr)
@@ -533,7 +629,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
}
void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
+dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
{
dmu_tx_hold_t *txh;
dnode_t *dn;
@@ -601,12 +697,8 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
}
}
- /*
- * 3 blocks overwritten: target leaf, ptrtbl block, header block
- * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
- */
- dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
- (3 + (add ? 3 : 0)) << dn->dn_datablkshift);
+ err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
+ &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
/*
* If the modified blocks are scattered to the four winds,
@@ -614,7 +706,10 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
*/
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
- txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+ if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
+ txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+ else
+ txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
}
void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
index f0b4080c074a..f9661d62d93e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
@@ -156,7 +156,7 @@ dnode_verify(dnode_t *dn)
}
if (dn->dn_phys->dn_type != DMU_OT_NONE)
ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || dn->dn_dbuf != NULL);
+ ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
if (dn->dn_dbuf != NULL) {
ASSERT3P(dn->dn_phys, ==,
(dnode_phys_t *)dn->dn_dbuf->db.db_data +
@@ -320,6 +320,7 @@ dnode_destroy(dnode_t *dn)
}
ASSERT(NULL == list_head(&dn->dn_dbufs));
#endif
+ ASSERT(dn->dn_oldphys == NULL);
mutex_enter(&os->os_lock);
list_remove(&os->os_dnodes, dn);
@@ -550,6 +551,22 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
*/
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
+ if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
+ dn = (object == DMU_USERUSED_OBJECT) ?
+ os->os_userused_dnode : os->os_groupused_dnode;
+ if (dn == NULL)
+ return (ENOENT);
+ type = dn->dn_type;
+ if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
+ return (ENOENT);
+ if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
+ return (EEXIST);
+ DNODE_VERIFY(dn);
+ (void) refcount_add(&dn->dn_holds, tag);
+ *dnp = dn;
+ return (0);
+ }
+
if (object == 0 || object >= DN_MAX_OBJECT)
return (EINVAL);
@@ -608,7 +625,8 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
type = dn->dn_type;
if (dn->dn_free_txg ||
((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
- ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)) {
+ ((flag & DNODE_MUST_BE_FREE) &&
+ (type != DMU_OT_NONE || dn->dn_oldphys))) {
mutex_exit(&dn->dn_mtx);
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
@@ -673,8 +691,10 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
objset_impl_t *os = dn->dn_objset;
uint64_t txg = tx->tx_txg;
- if (dn->dn_object == DMU_META_DNODE_OBJECT)
+ if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ dsl_dataset_dirty(os->os_dsl_dataset, tx);
return;
+ }
DNODE_VERIFY(dn);
@@ -1270,7 +1290,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
dprintf("probing object %llu offset %llx level %d of %u\n",
dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
- hole = flags & DNODE_FIND_HOLE;
+ hole = ((flags & DNODE_FIND_HOLE) != 0);
inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
ASSERT(txg == 0 || !hole);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
index 1b729e391a8f..3bf0c81d0992 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
@@ -506,9 +506,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
/*
* Write out the dnode's dirty buffers.
- *
- * NOTE: The dnode is kept in memory by being dirty. Once the
- * dirty bit is cleared, it may be evicted. Beware of this!
*/
void
dnode_sync(dnode_t *dn, dmu_tx_t *tx)
@@ -517,20 +514,33 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dnode_phys_t *dnp = dn->dn_phys;
int txgoff = tx->tx_txg & TXG_MASK;
list_t *list = &dn->dn_dirty_records[txgoff];
+ static const dnode_phys_t zerodn = { 0 };
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
+ ASSERT(dnp->dn_type != DMU_OT_NONE ||
+ bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
DNODE_VERIFY(dn);
ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
+ if (dmu_objset_userused_enabled(dn->dn_objset) &&
+ !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ ASSERT(dn->dn_oldphys == NULL);
+ dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
+ *dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
+ dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
+ } else {
+ /* Once we account for it, we should always account for it. */
+ ASSERT(!(dn->dn_phys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED));
+ }
+
mutex_enter(&dn->dn_mtx);
if (dn->dn_allocated_txg == tx->tx_txg) {
/* The dnode is newly allocated or reallocated */
if (dnp->dn_type == DMU_OT_NONE) {
/* this is a first alloc, not a realloc */
- /* XXX shouldn't the phys already be zeroed? */
- bzero(dnp, DNODE_CORE_SIZE);
dnp->dn_nlevels = 1;
dnp->dn_nblkptr = dn->dn_nblkptr;
}
@@ -628,7 +638,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dbuf_sync_list(list, tx);
- if (dn->dn_object != DMU_META_DNODE_OBJECT) {
+ if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
ASSERT3P(list_head(list), ==, NULL);
dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
index 622fa5d2db87..ac9d67f671f6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -229,7 +229,7 @@ dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
}
-int
+boolean_t
dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
{
return (blk_birth > dsl_dataset_prev_snap_txg(ds));
@@ -525,7 +525,15 @@ dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
rw_enter(&dp->dp_config_rwlock, RW_READER);
return (ENOENT);
}
+ /*
+ * The dp_config_rwlock lives above the ds_lock. And
+ * we need to check DSL_DATASET_IS_DESTROYED() while
+ * holding the ds_lock, so we have to drop and reacquire
+ * the ds_lock here.
+ */
+ mutex_exit(&ds->ds_lock);
rw_enter(&dp->dp_config_rwlock, RW_READER);
+ mutex_enter(&ds->ds_lock);
}
mutex_exit(&ds->ds_lock);
return (0);
@@ -981,6 +989,27 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
(void) dmu_free_object(os, obj);
}
+ /*
+ * We need to sync out all in-flight IO before we try to evict
+ * (the dataset evict func is trying to clear the cached entries
+ * for this dataset in the ARC).
+ */
+ txg_wait_synced(dd->dd_pool, 0);
+
+ /*
+ * If we managed to free all the objects in open
+ * context, the user space accounting should be zero.
+ */
+ if (ds->ds_phys->ds_bp.blk_fill == 0 &&
+ dmu_objset_userused_enabled(os->os)) {
+ uint64_t count;
+
+ ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
+ count == 0);
+ ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
+ count == 0);
+ }
+
dmu_objset_close(os);
if (err != ESRCH)
goto out;
@@ -1065,7 +1094,6 @@ dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
return (ds->ds_user_ptr);
}
-
blkptr_t *
dsl_dataset_get_blkptr(dsl_dataset_t *ds)
{
@@ -1445,6 +1473,33 @@ dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
cv_destroy(&arg.cv);
}
+static void
+remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
+{
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ uint64_t count;
+ int err;
+
+ ASSERT(ds->ds_phys->ds_num_children >= 2);
+ err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
+ /*
+ * The err should not be ENOENT, but a bug in a previous version
+ * of the code could cause upgrade_clones_cb() to not set
+ * ds_next_snap_obj when it should, leading to a missing entry.
+ * If we knew that the pool was created after
+ * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
+ * ENOENT. However, at least we can check that we don't have
+ * too many entries in the next_clones_obj even after failing to
+ * remove this one.
+ */
+ if (err != ENOENT) {
+ VERIFY3U(err, ==, 0);
+ }
+ ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+ &count));
+ ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
+}
+
void
dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
{
@@ -1495,8 +1550,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
if (after_branch_point &&
ds_prev->ds_phys->ds_next_clones_obj != 0) {
- VERIFY(0 == zap_remove_int(mos,
- ds_prev->ds_phys->ds_next_clones_obj, obj, tx));
+ remove_from_next_clones(ds_prev, obj, tx);
if (ds->ds_phys->ds_next_snap_obj != 0) {
VERIFY(0 == zap_add_int(mos,
ds_prev->ds_phys->ds_next_clones_obj,
@@ -1852,8 +1906,8 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
ds->ds_prev->ds_phys->ds_creation_txg);
ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
} else if (next_clones_obj != 0) {
- VERIFY3U(0, ==, zap_remove_int(mos,
- next_clones_obj, dsphys->ds_next_snap_obj, tx));
+ remove_from_next_clones(ds->ds_prev,
+ dsphys->ds_next_snap_obj, tx);
VERIFY3U(0, ==, zap_add_int(mos,
next_clones_obj, dsobj, tx));
}
@@ -1962,6 +2016,9 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
if (ds->ds_phys->ds_next_snap_obj) {
stat->dds_is_snapshot = B_TRUE;
stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
+ } else {
+ stat->dds_is_snapshot = B_FALSE;
+ stat->dds_num_clones = 0;
}
/* clone origin is really a dsl_dir thing... */
@@ -1973,6 +2030,8 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
dsl_dataset_name(ods, stat->dds_origin);
dsl_dataset_drop_ref(ods, FTAG);
+ } else {
+ stat->dds_origin[0] = '\0';
}
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
}
@@ -2439,9 +2498,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
/* change the origin's next clone */
if (origin_ds->ds_phys->ds_next_clones_obj) {
- VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
- origin_ds->ds_phys->ds_next_clones_obj,
- origin_ds->ds_phys->ds_next_snap_obj, tx));
+ remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
origin_ds->ds_phys->ds_next_clones_obj,
oldnext_obj, tx));
@@ -3039,12 +3096,8 @@ dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dataset_t *ds = arg1;
uint64_t *reservationp = arg2;
uint64_t new_reservation = *reservationp;
- int64_t delta;
uint64_t unique;
- if (new_reservation > INT64_MAX)
- return (EOVERFLOW);
-
if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
SPA_VERSION_REFRESERVATION)
return (ENOTSUP);
@@ -3061,15 +3114,18 @@ dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
mutex_enter(&ds->ds_lock);
unique = dsl_dataset_unique(ds);
- delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
mutex_exit(&ds->ds_lock);
- if (delta > 0 &&
- delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
- return (ENOSPC);
- if (delta > 0 && ds->ds_quota > 0 &&
- new_reservation > ds->ds_quota)
- return (ENOSPC);
+ if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) {
+ uint64_t delta = MAX(unique, new_reservation) -
+ MAX(unique, ds->ds_reserved);
+
+ if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
+ return (ENOSPC);
+ if (ds->ds_quota > 0 &&
+ new_reservation > ds->ds_quota)
+ return (ENOSPC);
+ }
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
index 96b5005a09ea..2f312ae3410c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -226,24 +226,11 @@ dsl_dir_namelen(dsl_dir_t *dd)
return (result);
}
-int
-dsl_dir_is_private(dsl_dir_t *dd)
-{
- int rv = FALSE;
-
- if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
- rv = TRUE;
- if (dataset_name_hidden(dd->dd_myname))
- rv = TRUE;
- return (rv);
-}
-
-
static int
getcomponent(const char *path, char *component, const char **nextp)
{
char *p;
- if (path == NULL)
+ if ((path == NULL) || (path[0] == '\0'))
return (ENOENT);
/* This would be a good place to reserve some namespace... */
p = strpbrk(path, "/@");
@@ -1076,10 +1063,6 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
uint64_t *reservationp = arg2;
uint64_t new_reservation = *reservationp;
uint64_t used, avail;
- int64_t delta;
-
- if (new_reservation > INT64_MAX)
- return (EOVERFLOW);
/*
* If we are doing the preliminary check in open context, the
@@ -1090,8 +1073,6 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
- delta = MAX(used, new_reservation) -
- MAX(used, dd->dd_phys->dd_reserved);
mutex_exit(&dd->dd_lock);
if (dd->dd_parent) {
@@ -1101,11 +1082,17 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
- if (delta > 0 && delta > avail)
- return (ENOSPC);
- if (delta > 0 && dd->dd_phys->dd_quota > 0 &&
- new_reservation > dd->dd_phys->dd_quota)
- return (ENOSPC);
+ if (MAX(used, new_reservation) > MAX(used, dd->dd_phys->dd_reserved)) {
+ uint64_t delta = MAX(used, new_reservation) -
+ MAX(used, dd->dd_phys->dd_reserved);
+
+ if (delta > avail)
+ return (ENOSPC);
+ if (dd->dd_phys->dd_quota > 0 &&
+ new_reservation > dd->dd_phys->dd_quota)
+ return (ENOSPC);
+ }
+
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
index e5823c5954d7..0f00bc965dcd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,14 +133,15 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
goto out;
err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj,
FTAG, &ds);
+ if (err == 0) {
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, dp,
+ &dp->dp_origin_snap);
+ dsl_dataset_rele(ds, FTAG);
+ }
+ dsl_dir_close(dd, dp);
if (err)
goto out;
- err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
- dp, &dp->dp_origin_snap);
- if (err)
- goto out;
- dsl_dataset_rele(ds, FTAG);
- dsl_dir_close(dd, dp);
}
/* get scrub status */
@@ -303,23 +304,51 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dp->dp_read_overhead = 0;
start = gethrtime();
+
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
- if (!list_link_active(&ds->ds_synced_link))
- list_insert_tail(&dp->dp_synced_datasets, ds);
- else
- dmu_buf_rele(ds->ds_dbuf, ds);
+ /*
+ * We must not sync any non-MOS datasets twice, because
+ * we may have taken a snapshot of them. However, we
+ * may sync newly-created datasets on pass 2.
+ */
+ ASSERT(!list_link_active(&ds->ds_synced_link));
+ list_insert_tail(&dp->dp_synced_datasets, ds);
dsl_dataset_sync(ds, zio, tx);
}
DTRACE_PROBE(pool_sync__1setup);
-
err = zio_wait(zio);
+
write_time = gethrtime() - start;
ASSERT(err == 0);
DTRACE_PROBE(pool_sync__2rootzio);
- while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
+ for (ds = list_head(&dp->dp_synced_datasets); ds;
+ ds = list_next(&dp->dp_synced_datasets, ds))
+ dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
+
+ /*
+ * Sync the datasets again to push out the changes due to
+ * userquota updates. This must be done before we process the
+ * sync tasks, because that could cause a snapshot of a dataset
+ * whose ds_bp will be rewritten when we do this 2nd sync.
+ */
+ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
+ ASSERT(list_link_active(&ds->ds_synced_link));
+ dmu_buf_rele(ds->ds_dbuf, ds);
+ dsl_dataset_sync(ds, zio, tx);
+ }
+ err = zio_wait(zio);
+
+ while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
+ /*
+ * No more sync tasks should have been added while we
+ * were syncing.
+ */
+ ASSERT(spa_sync_pass(dp->dp_spa) == 1);
dsl_sync_task_group_sync(dstg, tx);
+ }
DTRACE_PROBE(pool_sync__3task);
start = gethrtime();
@@ -574,6 +603,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
+ dmu_buf_will_dirty(prev->ds_dbuf, tx);
prev->ds_phys->ds_next_clones_obj =
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
@@ -593,8 +623,8 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
- (void) dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
- tx, DS_FIND_CHILDREN);
+ VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
+ tx, DS_FIND_CHILDREN));
}
void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
index 212acbbc5968..d06493236805 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
@@ -416,6 +414,34 @@ dsl_prop_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
}
void
+dsl_props_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ nvlist_t *nvl = arg2;
+ nvpair_t *elem = NULL;
+
+ while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+ struct prop_set_arg psa;
+
+ psa.name = nvpair_name(elem);
+
+ if (nvpair_type(elem) == DATA_TYPE_STRING) {
+ VERIFY(nvpair_value_string(elem,
+ (char **)&psa.buf) == 0);
+ psa.intsz = 1;
+ psa.numints = strlen(psa.buf) + 1;
+ } else {
+ uint64_t intval;
+ VERIFY(nvpair_value_uint64(elem, &intval) == 0);
+ psa.intsz = sizeof (intval);
+ psa.numints = 1;
+ psa.buf = &intval;
+ }
+ dsl_prop_set_sync(ds, &psa, cr, tx);
+ }
+}
+
+void
dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx)
{
@@ -471,6 +497,43 @@ dsl_prop_set(const char *dsname, const char *propname,
return (err);
}
+int
+dsl_props_set(const char *dsname, nvlist_t *nvl)
+{
+ dsl_dataset_t *ds;
+ nvpair_t *elem = NULL;
+ int err;
+
+ /*
+ * Do these checks before the syncfunc, since it can't fail.
+ */
+ while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+ if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN)
+ return (ENAMETOOLONG);
+ if (nvpair_type(elem) == DATA_TYPE_STRING) {
+ char *valstr;
+ VERIFY(nvpair_value_string(elem, &valstr) == 0);
+ if (strlen(valstr) >= ZAP_MAXVALUELEN)
+ return (E2BIG);
+ }
+ }
+
+ if (err = dsl_dataset_hold(dsname, FTAG, &ds))
+ return (err);
+
+ if (dsl_dataset_is_snapshot(ds) &&
+ spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_SNAP_PROPS) {
+ dsl_dataset_rele(ds, FTAG);
+ return (ENOTSUP);
+ }
+
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+ NULL, dsl_props_set_sync, ds, nvl, 2);
+
+ dsl_dataset_rele(ds, FTAG);
+ return (err);
+}
+
/*
* Iterate over all properties for this dataset and return them in an nvlist.
*/
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
index 84561ab82874..d11f106f7b6e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,6 +45,8 @@ typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scrub_cb_t dsl_pool_scrub_clean_cb;
static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
+static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+ uint64_t objset, uint64_t object);
int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */
int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */
@@ -95,6 +97,9 @@ dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
ESC_ZFS_RESILVER_START);
dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg,
tx->tx_txg);
+ } else {
+ spa_event_notify(dp->dp_spa, NULL,
+ ESC_ZFS_SCRUB_START);
}
/* zero out the scrub stats in all vdev_stat_t's */
@@ -212,8 +217,9 @@ dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
*/
vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg,
*completep ? dp->dp_scrub_max_txg : 0, B_TRUE);
- if (dp->dp_scrub_min_txg && *completep)
- spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_FINISH);
+ if (*completep)
+ spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ?
+ ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
spa_errlog_rotate(dp->dp_spa);
/*
@@ -402,7 +408,7 @@ traverse_zil(dsl_pool_t *dp, zil_header_t *zh)
* We only want to visit blocks that have been claimed but not yet
* replayed (or, in read-only mode, blocks that *would* be claimed).
*/
- if (claim_txg == 0 && (spa_mode & FWRITE))
+ if (claim_txg == 0 && spa_writeable(dp->dp_spa))
return;
zilog = zil_alloc(dp->dp_meta_objset, zh);
@@ -420,9 +426,6 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
int err;
arc_buf_t *buf = NULL;
- if (bp->blk_birth == 0)
- return;
-
if (bp->blk_birth <= dp->dp_scrub_min_txg)
return;
@@ -482,7 +485,7 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_WAIT;
dnode_phys_t *child_dnp;
- int i, j;
+ int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
err = arc_read(NULL, dp->dp_spa, bp, pbuf,
@@ -497,20 +500,12 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
child_dnp = buf->b_data;
for (i = 0; i < epb; i++, child_dnp++) {
- for (j = 0; j < child_dnp->dn_nblkptr; j++) {
- zbookmark_t czb;
-
- SET_BOOKMARK(&czb, zb->zb_objset,
- zb->zb_blkid * epb + i,
- child_dnp->dn_nlevels - 1, j);
- scrub_visitbp(dp, child_dnp, buf,
- &child_dnp->dn_blkptr[j], &czb);
- }
+ scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
+ zb->zb_blkid * epb + i);
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
- int j;
err = arc_read_nolock(NULL, dp->dp_spa, bp,
arc_getbuf_func, &buf,
@@ -526,13 +521,13 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
traverse_zil(dp, &osp->os_zil_header);
- for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
- zbookmark_t czb;
-
- SET_BOOKMARK(&czb, zb->zb_objset, 0,
- osp->os_meta_dnode.dn_nlevels - 1, j);
- scrub_visitbp(dp, &osp->os_meta_dnode, buf,
- &osp->os_meta_dnode.dn_blkptr[j], &czb);
+ scrub_visitdnode(dp, &osp->os_meta_dnode,
+ buf, zb->zb_objset, 0);
+ if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+ scrub_visitdnode(dp, &osp->os_userused_dnode,
+ buf, zb->zb_objset, 0);
+ scrub_visitdnode(dp, &osp->os_groupused_dnode,
+ buf, zb->zb_objset, 0);
}
}
@@ -542,6 +537,21 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
}
static void
+scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+ uint64_t objset, uint64_t object)
+{
+ int j;
+
+ for (j = 0; j < dnp->dn_nblkptr; j++) {
+ zbookmark_t czb;
+
+ SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+ scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
+ }
+
+}
+
+static void
scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
{
zbookmark_t zb;
@@ -688,17 +698,34 @@ scrub_visitds(dsl_pool_t *dp, uint64_t dsobj, dmu_tx_t *tx)
ds->ds_phys->ds_next_snap_obj, tx) == 0);
}
if (ds->ds_phys->ds_num_children > 1) {
- if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
+ boolean_t usenext = B_FALSE;
+ if (ds->ds_phys->ds_next_clones_obj != 0) {
+ uint64_t count;
+ /*
+ * A bug in a previous version of the code could
+ * cause upgrade_clones_cb() to not set
+ * ds_next_snap_obj when it should, leading to a
+ * missing entry. Therefore we can only use the
+ * next_clones_obj when its count is correct.
+ */
+ int err = zap_count(dp->dp_meta_objset,
+ ds->ds_phys->ds_next_clones_obj, &count);
+ if (err == 0 &&
+ count == ds->ds_phys->ds_num_children - 1)
+ usenext = B_TRUE;
+ }
+
+ if (usenext) {
+ VERIFY(zap_join(dp->dp_meta_objset,
+ ds->ds_phys->ds_next_clones_obj,
+ dp->dp_scrub_queue_obj, tx) == 0);
+ } else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
- } else {
- VERIFY(zap_join(dp->dp_meta_objset,
- ds->ds_phys->ds_next_clones_obj,
- dp->dp_scrub_queue_obj, tx) == 0);
}
}
@@ -751,6 +778,7 @@ enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
void
dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
{
+ spa_t *spa = dp->dp_spa;
zap_cursor_t zc;
zap_attribute_t za;
boolean_t complete = B_TRUE;
@@ -758,8 +786,10 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
return;
- /* If the spa is not fully loaded, don't bother. */
- if (dp->dp_spa->spa_load_state != SPA_LOAD_NONE)
+ /*
+ * If the pool is not loaded, or is trying to unload, leave it alone.
+ */
+ if (spa->spa_load_state != SPA_LOAD_NONE || spa_shutting_down(spa))
return;
if (dp->dp_scrub_restart) {
@@ -768,13 +798,13 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_pool_scrub_setup_sync(dp, &func, kcred, tx);
}
- if (dp->dp_spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) {
+ if (spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) {
/*
* We must have resumed after rebooting; reset the vdev
* stats to know that we're doing a scrub (although it
* will think we're just starting now).
*/
- vdev_scrub_stat_update(dp->dp_spa->spa_root_vdev,
+ vdev_scrub_stat_update(spa->spa_root_vdev,
dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER :
POOL_SCRUB_EVERYTHING, B_FALSE);
}
@@ -782,7 +812,7 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dp->dp_scrub_pausing = B_FALSE;
dp->dp_scrub_start_time = lbolt64;
dp->dp_scrub_isresilver = (dp->dp_scrub_min_txg != 0);
- dp->dp_spa->spa_scrub_active = B_TRUE;
+ spa->spa_scrub_active = B_TRUE;
if (dp->dp_scrub_bookmark.zb_objset == 0) {
/* First do the MOS & ORIGIN */
@@ -790,8 +820,8 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
if (dp->dp_scrub_pausing)
goto out;
- if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
- VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
+ if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
+ VERIFY(0 == dmu_objset_find_spa(spa,
NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
scrub_visitds(dp, dp->dp_origin_snap->ds_object, tx);
@@ -841,15 +871,13 @@ out:
VERIFY(0 == zap_update(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
- &dp->dp_spa->spa_scrub_errors, tx));
+ &spa->spa_scrub_errors, tx));
/* XXX this is scrub-clean specific */
- mutex_enter(&dp->dp_spa->spa_scrub_lock);
- while (dp->dp_spa->spa_scrub_inflight > 0) {
- cv_wait(&dp->dp_spa->spa_scrub_io_cv,
- &dp->dp_spa->spa_scrub_lock);
- }
- mutex_exit(&dp->dp_spa->spa_scrub_lock);
+ mutex_enter(&spa->spa_scrub_lock);
+ while (spa->spa_scrub_inflight > 0)
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+ mutex_exit(&spa->spa_scrub_lock);
}
void
@@ -931,13 +959,17 @@ static int
dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
const blkptr_t *bp, const zbookmark_t *zb)
{
- size_t size = BP_GET_LSIZE(bp);
- int d;
+ size_t size = BP_GET_PSIZE(bp);
spa_t *spa = dp->dp_spa;
boolean_t needs_io;
- int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL;
+ int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
int zio_priority;
+ ASSERT(bp->blk_birth > dp->dp_scrub_min_txg);
+
+ if (bp->blk_birth >= dp->dp_scrub_max_txg)
+ return (0);
+
count_block(dp->dp_blkstats, bp);
if (dp->dp_scrub_isresilver == 0) {
@@ -956,7 +988,7 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
zio_flags |= ZIO_FLAG_SPECULATIVE;
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
+ for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
vdev_t *vd = vdev_lookup_top(spa,
DVA_GET_VDEV(&bp->blk_dva[d]));
@@ -974,16 +1006,17 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
if (DVA_GET_GANG(&bp->blk_dva[d])) {
/*
* Gang members may be spread across multiple
- * vdevs, so the best we can do is look at the
- * pool-wide DTL.
+ * vdevs, so the best estimate we have is the
+ * scrub range, which has already been checked.
* XXX -- it would be better to change our
- * allocation policy to ensure that this can't
- * happen.
+ * allocation policy to ensure that all
+ * gang members reside on the same vdev.
*/
- vd = spa->spa_root_vdev;
+ needs_io = B_TRUE;
+ } else {
+ needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
+ bp->blk_birth, 1);
}
- needs_io = vdev_dtl_contains(&vd->vdev_dtl_map,
- bp->blk_birth, 1);
}
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
index 47f8f5fdafb3..d216154db04d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -36,18 +36,35 @@ uint64_t metaslab_aliquot = 512ULL << 10;
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
+ * Minimum size which forces the dynamic allocator to change
+ * it's allocation strategy. Once the space map cannot satisfy
+ * an allocation of this size then it switches to using more
+ * aggressive strategy (i.e search by size rather than offset).
+ */
+uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
+
+/*
+ * The minimum free space, in percent, which must be available
+ * in a space map to continue allocations in a first-fit fashion.
+ * Once the space_map's free space drops below this level we dynamically
+ * switch to using best-fit allocations.
+ */
+int metaslab_df_free_pct = 30;
+
+/*
* ==========================================================================
* Metaslab classes
* ==========================================================================
*/
metaslab_class_t *
-metaslab_class_create(void)
+metaslab_class_create(space_map_ops_t *ops)
{
metaslab_class_t *mc;
mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
mc->mc_rotor = NULL;
+ mc->mc_ops = ops;
return (mc);
}
@@ -202,30 +219,14 @@ metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
}
/*
- * ==========================================================================
- * The first-fit block allocator
- * ==========================================================================
+ * This is a helper function that can be used by the allocator to find
+ * a suitable block to allocate. This will search the specified AVL
+ * tree looking for a block that matches the specified criteria.
*/
-static void
-metaslab_ff_load(space_map_t *sm)
-{
- ASSERT(sm->sm_ppd == NULL);
- sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
-}
-
-static void
-metaslab_ff_unload(space_map_t *sm)
-{
- kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
- sm->sm_ppd = NULL;
-}
-
static uint64_t
-metaslab_ff_alloc(space_map_t *sm, uint64_t size)
+metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
+ uint64_t align)
{
- avl_tree_t *t = &sm->sm_root;
- uint64_t align = size & -size;
- uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
space_seg_t *ss, ssearch;
avl_index_t where;
@@ -254,7 +255,37 @@ metaslab_ff_alloc(space_map_t *sm, uint64_t size)
return (-1ULL);
*cursor = 0;
- return (metaslab_ff_alloc(sm, size));
+ return (metaslab_block_picker(t, cursor, size, align));
+}
+
+/*
+ * ==========================================================================
+ * The first-fit block allocator
+ * ==========================================================================
+ */
+static void
+metaslab_ff_load(space_map_t *sm)
+{
+ ASSERT(sm->sm_ppd == NULL);
+ sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
+ sm->sm_pp_root = NULL;
+}
+
+static void
+metaslab_ff_unload(space_map_t *sm)
+{
+ kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
+ sm->sm_ppd = NULL;
+}
+
+static uint64_t
+metaslab_ff_alloc(space_map_t *sm, uint64_t size)
+{
+ avl_tree_t *t = &sm->sm_root;
+ uint64_t align = size & -size;
+ uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
+
+ return (metaslab_block_picker(t, cursor, size, align));
}
/* ARGSUSED */
@@ -276,9 +307,136 @@ static space_map_ops_t metaslab_ff_ops = {
metaslab_ff_unload,
metaslab_ff_alloc,
metaslab_ff_claim,
- metaslab_ff_free
+ metaslab_ff_free,
+ NULL /* maxsize */
+};
+
+/*
+ * Dynamic block allocator -
+ * Uses the first fit allocation scheme until space get low and then
+ * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
+ * and metaslab_df_free_pct to determine when to switch the allocation scheme.
+ */
+
+uint64_t
+metaslab_df_maxsize(space_map_t *sm)
+{
+ avl_tree_t *t = sm->sm_pp_root;
+ space_seg_t *ss;
+
+ if (t == NULL || (ss = avl_last(t)) == NULL)
+ return (0ULL);
+
+ return (ss->ss_end - ss->ss_start);
+}
+
+static int
+metaslab_df_seg_compare(const void *x1, const void *x2)
+{
+ const space_seg_t *s1 = x1;
+ const space_seg_t *s2 = x2;
+ uint64_t ss_size1 = s1->ss_end - s1->ss_start;
+ uint64_t ss_size2 = s2->ss_end - s2->ss_start;
+
+ if (ss_size1 < ss_size2)
+ return (-1);
+ if (ss_size1 > ss_size2)
+ return (1);
+
+ if (s1->ss_start < s2->ss_start)
+ return (-1);
+ if (s1->ss_start > s2->ss_start)
+ return (1);
+
+ return (0);
+}
+
+static void
+metaslab_df_load(space_map_t *sm)
+{
+ space_seg_t *ss;
+
+ ASSERT(sm->sm_ppd == NULL);
+ sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
+
+ sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
+ avl_create(sm->sm_pp_root, metaslab_df_seg_compare,
+ sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
+
+ for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
+ avl_add(sm->sm_pp_root, ss);
+}
+
+static void
+metaslab_df_unload(space_map_t *sm)
+{
+ void *cookie = NULL;
+
+ kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
+ sm->sm_ppd = NULL;
+
+ while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) {
+ /* tear down the tree */
+ }
+
+ avl_destroy(sm->sm_pp_root);
+ kmem_free(sm->sm_pp_root, sizeof (avl_tree_t));
+ sm->sm_pp_root = NULL;
+}
+
+static uint64_t
+metaslab_df_alloc(space_map_t *sm, uint64_t size)
+{
+ avl_tree_t *t = &sm->sm_root;
+ uint64_t align = size & -size;
+ uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
+ uint64_t max_size = metaslab_df_maxsize(sm);
+ int free_pct = sm->sm_space * 100 / sm->sm_size;
+
+ ASSERT(MUTEX_HELD(sm->sm_lock));
+ ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
+
+ if (max_size < size)
+ return (-1ULL);
+
+ /*
+ * If we're running low on space switch to using the size
+ * sorted AVL tree (best-fit).
+ */
+ if (max_size < metaslab_df_alloc_threshold ||
+ free_pct < metaslab_df_free_pct) {
+ t = sm->sm_pp_root;
+ *cursor = 0;
+ }
+
+ return (metaslab_block_picker(t, cursor, size, 1ULL));
+}
+
+/* ARGSUSED */
+static void
+metaslab_df_claim(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ /* No need to update cursor */
+}
+
+/* ARGSUSED */
+static void
+metaslab_df_free(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ /* No need to update cursor */
+}
+
+static space_map_ops_t metaslab_df_ops = {
+ metaslab_df_load,
+ metaslab_df_unload,
+ metaslab_df_alloc,
+ metaslab_df_claim,
+ metaslab_df_free,
+ metaslab_df_maxsize
};
+space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
+
/*
* ==========================================================================
* Metaslabs
@@ -414,20 +572,28 @@ metaslab_weight(metaslab_t *msp)
}
static int
-metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
+metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
{
space_map_t *sm = &msp->ms_map;
+ space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
ASSERT(MUTEX_HELD(&msp->ms_lock));
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
- int error = space_map_load(sm, &metaslab_ff_ops,
- SM_FREE, &msp->ms_smo,
+ int error = space_map_load(sm, sm_ops, SM_FREE, &msp->ms_smo,
msp->ms_group->mg_vd->vdev_spa->spa_meta_objset);
if (error) {
metaslab_group_sort(msp->ms_group, msp, 0);
return (error);
}
+
+ /*
+ * If we were able to load the map then make sure
+ * that this map is still able to satisfy our request.
+ */
+ if (msp->ms_weight < size)
+ return (ENOSPC);
+
metaslab_group_sort(msp->ms_group, msp,
msp->ms_weight | activation_weight);
}
@@ -636,11 +802,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
int i;
activation_weight = METASLAB_WEIGHT_PRIMARY;
- for (i = 0; i < d; i++)
- if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id)
+ for (i = 0; i < d; i++) {
+ if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
activation_weight = METASLAB_WEIGHT_SECONDARY;
+ break;
+ }
+ }
for (;;) {
+ boolean_t was_active;
+
mutex_enter(&mg->mg_lock);
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
if (msp->ms_weight < size) {
@@ -648,6 +819,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
return (-1ULL);
}
+ was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
break;
@@ -673,7 +845,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
* another thread may have changed the weight while we
* were blocked on the metaslab lock.
*/
- if (msp->ms_weight < size) {
+ if (msp->ms_weight < size || (was_active &&
+ !(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
+ activation_weight == METASLAB_WEIGHT_PRIMARY)) {
mutex_exit(&msp->ms_lock);
continue;
}
@@ -686,7 +860,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
continue;
}
- if (metaslab_activate(msp, activation_weight) != 0) {
+ if (metaslab_activate(msp, activation_weight, size) != 0) {
mutex_exit(&msp->ms_lock);
continue;
}
@@ -720,6 +894,8 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
vdev_t *vd;
int dshift = 3;
int all_zero;
+ int zio_lock = B_FALSE;
+ boolean_t allocatable;
uint64_t offset = -1ULL;
uint64_t asize;
uint64_t distance;
@@ -778,11 +954,20 @@ top:
all_zero = B_TRUE;
do {
vd = mg->mg_vd;
+
/*
* Don't allocate from faulted devices.
*/
- if (!vdev_allocatable(vd))
+ if (zio_lock) {
+ spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER);
+ allocatable = vdev_allocatable(vd);
+ spa_config_exit(spa, SCL_ZIO, FTAG);
+ } else {
+ allocatable = vdev_allocatable(vd);
+ }
+ if (!allocatable)
goto next;
+
/*
* Avoid writing single-copy data to a failing vdev
*/
@@ -858,6 +1043,12 @@ next:
goto top;
}
+ if (!allocatable && !zio_lock) {
+ dshift = 3;
+ zio_lock = B_TRUE;
+ goto top;
+ }
+
bzero(&dva[d], sizeof (dva_t));
return (ENOSPC);
@@ -938,7 +1129,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
mutex_enter(&msp->ms_lock);
- error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
+ error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
if (error || txg == 0) { /* txg == 0 indicates dry run */
mutex_exit(&msp->ms_lock);
return (error);
@@ -946,7 +1137,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
space_map_claim(&msp->ms_map, offset, size);
- if (spa_mode & FWRITE) { /* don't dirty if we're zdb(1M) */
+ if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
vdev_dirty(vd, VDD_METASLAB, msp, txg);
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index b8925e36e241..cb6f413c640b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -70,16 +70,44 @@ TUNABLE_INT("vfs.zfs.check_hostid", &check_hostid);
SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0,
"Check hostid on import?");
-int zio_taskq_threads[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
- /* ISSUE INTR */
- { 1, 1 }, /* ZIO_TYPE_NULL */
- { 1, 8 }, /* ZIO_TYPE_READ */
- { 8, 1 }, /* ZIO_TYPE_WRITE */
- { 1, 1 }, /* ZIO_TYPE_FREE */
- { 1, 1 }, /* ZIO_TYPE_CLAIM */
- { 1, 1 }, /* ZIO_TYPE_IOCTL */
+enum zti_modes {
+ zti_mode_fixed, /* value is # of threads (min 1) */
+ zti_mode_online_percent, /* value is % of online CPUs */
+ zti_mode_tune, /* fill from zio_taskq_tune_* */
+ zti_nmodes
};
+#define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) }
+#define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) }
+#define ZTI_THREAD_TUNE { zti_mode_tune, 0 }
+
+#define ZTI_THREAD_ONE ZTI_THREAD_FIX(1)
+
+typedef struct zio_taskq_info {
+ const char *zti_name;
+ struct {
+ enum zti_modes zti_mode;
+ uint_t zti_value;
+ } zti_nthreads[ZIO_TASKQ_TYPES];
+} zio_taskq_info_t;
+
+static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
+ "issue", "intr"
+};
+
+const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = {
+ /* ISSUE INTR */
+ { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
+ { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } },
+ { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } },
+ { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
+ { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
+ { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
+};
+
+enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent;
+uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */
+
static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
@@ -117,38 +145,38 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
static void
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
{
- uint64_t size = spa_get_space(spa);
- uint64_t used = spa_get_alloc(spa);
+ uint64_t size;
+ uint64_t used;
uint64_t cap, version;
zprop_source_t src = ZPROP_SRC_NONE;
spa_config_dirent_t *dp;
ASSERT(MUTEX_HELD(&spa->spa_props_lock));
- /*
- * readonly properties
- */
- spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
- spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
- spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src);
- spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src);
-
- cap = (size == 0) ? 0 : (used * 100 / size);
- spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
+ if (spa->spa_root_vdev != NULL) {
+ size = spa_get_space(spa);
+ used = spa_get_alloc(spa);
+ spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
+ spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
+ spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src);
+ spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL,
+ size - used, src);
+
+ cap = (size == 0) ? 0 : (used * 100 / size);
+ spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
+
+ spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
+ spa->spa_root_vdev->vdev_state, src);
+
+ version = spa_version(spa);
+ if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
+ src = ZPROP_SRC_DEFAULT;
+ else
+ src = ZPROP_SRC_LOCAL;
+ spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
+ }
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
- spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
- spa->spa_root_vdev->vdev_state, src);
-
- /*
- * settable properties that are not stored in the pool property object.
- */
- version = spa_version(spa);
- if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
- src = ZPROP_SRC_DEFAULT;
- else
- src = ZPROP_SRC_LOCAL;
- spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
if (spa->spa_root != NULL)
spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
@@ -313,6 +341,11 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
break;
case ZPOOL_PROP_BOOTFS:
+ /*
+ * If the pool version is less than SPA_VERSION_BOOTFS,
+ * or the pool is still being created (version == 0),
+ * the bootfs property cannot be set.
+ */
if (spa_version(spa) < SPA_VERSION_BOOTFS) {
error = ENOTSUP;
break;
@@ -419,16 +452,60 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
return (error);
}
+void
+spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
+{
+ char *cachefile;
+ spa_config_dirent_t *dp;
+
+ if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
+ &cachefile) != 0)
+ return;
+
+ dp = kmem_alloc(sizeof (spa_config_dirent_t),
+ KM_SLEEP);
+
+ if (cachefile[0] == '\0')
+ dp->scd_path = spa_strdup(spa_config_path);
+ else if (strcmp(cachefile, "none") == 0)
+ dp->scd_path = NULL;
+ else
+ dp->scd_path = spa_strdup(cachefile);
+
+ list_insert_head(&spa->spa_config_list, dp);
+ if (need_sync)
+ spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+}
+
int
spa_prop_set(spa_t *spa, nvlist_t *nvp)
{
int error;
+ nvpair_t *elem;
+ boolean_t need_sync = B_FALSE;
+ zpool_prop_t prop;
if ((error = spa_prop_validate(spa, nvp)) != 0)
return (error);
- return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
- spa, nvp, 3));
+ elem = NULL;
+ while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
+ if ((prop = zpool_name_to_prop(
+ nvpair_name(elem))) == ZPROP_INVAL)
+ return (EINVAL);
+
+ if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
+ continue;
+
+ need_sync = B_TRUE;
+ break;
+ }
+
+ if (need_sync)
+ return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
+ spa, nvp, 3));
+ else
+ return (0);
}
/*
@@ -493,21 +570,57 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
* Activate an uninitialized pool.
*/
static void
-spa_activate(spa_t *spa)
+spa_activate(spa_t *spa, int mode)
{
-
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
spa->spa_state = POOL_STATE_ACTIVE;
+ spa->spa_mode = mode;
- spa->spa_normal_class = metaslab_class_create();
- spa->spa_log_class = metaslab_class_create();
+ spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops);
+ spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops);
for (int t = 0; t < ZIO_TYPES; t++) {
+ const zio_taskq_info_t *ztip = &zio_taskqs[t];
for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
- spa->spa_zio_taskq[t][q] = taskq_create("spa_zio",
- zio_taskq_threads[t][q], maxclsyspri, 50,
- INT_MAX, TASKQ_PREPOPULATE);
+ enum zti_modes mode = ztip->zti_nthreads[q].zti_mode;
+ uint_t value = ztip->zti_nthreads[q].zti_value;
+ char name[32];
+
+ (void) snprintf(name, sizeof (name),
+ "%s_%s", ztip->zti_name, zio_taskq_types[q]);
+
+ if (mode == zti_mode_tune) {
+ mode = zio_taskq_tune_mode;
+ value = zio_taskq_tune_value;
+ if (mode == zti_mode_tune)
+ mode = zti_mode_online_percent;
+ }
+
+ switch (mode) {
+ case zti_mode_fixed:
+ ASSERT3U(value, >=, 1);
+ value = MAX(value, 1);
+
+ spa->spa_zio_taskq[t][q] = taskq_create(name,
+ value, maxclsyspri, 50, INT_MAX,
+ TASKQ_PREPOPULATE);
+ break;
+
+ case zti_mode_online_percent:
+ spa->spa_zio_taskq[t][q] = taskq_create(name,
+ value, maxclsyspri, 50, INT_MAX,
+ TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
+ break;
+
+ case zti_mode_tune:
+ default:
+ panic("unrecognized mode for "
+ "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) "
+ "in spa_activate()",
+ t, q, mode, value);
+ break;
+ }
}
}
@@ -536,7 +649,7 @@ spa_deactivate(spa_t *spa)
ASSERT(spa->spa_sync_on == B_FALSE);
ASSERT(spa->spa_dsl_pool == NULL);
ASSERT(spa->spa_root_vdev == NULL);
-
+ ASSERT(spa->spa_async_zio_root == NULL);
ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
txg_list_destroy(&spa->spa_vdev_txg_list);
@@ -642,15 +755,10 @@ spa_unload(spa_t *spa)
/*
* Wait for any outstanding async I/O to complete.
*/
- mutex_enter(&spa->spa_async_root_lock);
- while (spa->spa_async_root_count != 0)
- cv_wait(&spa->spa_async_root_cv, &spa->spa_async_root_lock);
- mutex_exit(&spa->spa_async_root_lock);
-
- /*
- * Drop and purge level 2 cache
- */
- spa_l2cache_drop(spa);
+ if (spa->spa_async_zio_root != NULL) {
+ (void) zio_wait(spa->spa_async_zio_root);
+ spa->spa_async_zio_root = NULL;
+ }
/*
* Close the dsl pool.
@@ -660,6 +768,13 @@ spa_unload(spa_t *spa)
spa->spa_dsl_pool = NULL;
}
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+
+ /*
+ * Drop and purge level 2 cache
+ */
+ spa_l2cache_drop(spa);
+
/*
* Close all vdevs.
*/
@@ -694,6 +809,8 @@ spa_unload(spa_t *spa)
spa->spa_l2cache.sav_count = 0;
spa->spa_async_suspended = 0;
+
+ spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
@@ -783,6 +900,7 @@ spa_load_spares(spa_t *spa)
}
vd->vdev_top = vd;
+ vd->vdev_aux = &spa->spa_spares;
if (vdev_open(vd) != 0)
continue;
@@ -905,12 +1023,9 @@ spa_load_l2cache(spa_t *spa)
vd = oldvdevs[i];
if (vd != NULL) {
- if ((spa_mode & FWRITE) &&
- spa_l2cache_exists(vd->vdev_guid, &pool) &&
- pool != 0ULL &&
- l2arc_vdev_present(vd)) {
+ if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
+ pool != 0ULL && l2arc_vdev_present(vd))
l2arc_remove_vdev(vd);
- }
(void) vdev_close(vd);
spa_l2cache_remove(vd);
}
@@ -959,7 +1074,8 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
dmu_buf_rele(db, FTAG);
packed = kmem_alloc(nvsize, KM_SLEEP);
- error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
+ error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
+ DMU_READ_PREFETCH);
if (error == 0)
error = nvlist_unpack(packed, nvsize, value, 0);
kmem_free(packed, nvsize);
@@ -1026,8 +1142,16 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
uint64_t pool_guid;
uint64_t version;
uint64_t autoreplace = 0;
+ int orig_mode = spa->spa_mode;
char *ereport = FM_EREPORT_ZFS_POOL;
+ /*
+ * If this is an untrusted config, access the pool in read-only mode.
+ * This prevents things like resilvering recently removed devices.
+ */
+ if (!mosconfig)
+ spa->spa_mode = FREAD;
+
ASSERT(MUTEX_HELD(&spa_namespace_lock));
spa->spa_load_state = state;
@@ -1057,6 +1181,12 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
spa->spa_load_guid = pool_guid;
/*
+ * Create "The Godfather" zio to hold all async IOs
+ */
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
+
+ /*
* Parse the configuration into a vdev tree. We explicitly set the
* value that will be returned by spa_version() since parsing the
* configuration requires knowing the version number.
@@ -1082,13 +1212,17 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
goto out;
/*
- * Validate the labels for all leaf vdevs. We need to grab the config
- * lock because all label I/O is done with ZIO_FLAG_CONFIG_WRITER.
+ * We need to validate the vdev labels against the configuration that
+ * we have in hand, which is dependent on the setting of mosconfig. If
+ * mosconfig is true then we're validating the vdev labels based on
+ * that config. Otherwise, we're validating against the cached config
+ * (zpool.cache) that was read when we loaded the zfs module, and then
+ * later we will recursively call spa_load() and validate against
+ * the vdev config.
*/
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
error = vdev_validate(rvd);
spa_config_exit(spa, SCL_ALL, FTAG);
-
if (error != 0)
goto out;
@@ -1192,7 +1326,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
spa_config_set(spa, newconfig);
spa_unload(spa);
spa_deactivate(spa);
- spa_activate(spa);
+ spa_activate(spa, orig_mode);
return (spa_load(spa, newconfig, state, B_TRUE));
}
@@ -1384,10 +1518,11 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
goto out;
}
- if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) {
+ if (spa_writeable(spa)) {
dmu_tx_t *tx;
int need_update = B_FALSE;
- int c;
+
+ ASSERT(state != SPA_LOAD_TRYIMPORT);
/*
* Claim log blocks that haven't been committed yet.
@@ -1410,12 +1545,15 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
/*
* If the config cache is stale, or we have uninitialized
* metaslabs (see spa_vdev_add()), then update the config.
+ *
+ * If spa_load_verbatim is true, trust the current
+ * in-core spa_config and update the disk labels.
*/
if (config_cache_txg != spa->spa_config_txg ||
- state == SPA_LOAD_IMPORT)
+ state == SPA_LOAD_IMPORT || spa->spa_load_verbatim)
need_update = B_TRUE;
- for (c = 0; c < rvd->vdev_children; c++)
+ for (int c = 0; c < rvd->vdev_children; c++)
if (rvd->vdev_child[c]->vdev_ms_array == 0)
need_update = B_TRUE;
@@ -1483,7 +1621,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config)
}
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
- spa_activate(spa);
+ spa_activate(spa, spa_mode_global);
error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE);
@@ -1586,6 +1724,8 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
uint_t vsc;
uint64_t pool;
+ ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
+
if (spa->spa_spares.sav_count == 0)
return;
@@ -1633,11 +1773,11 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
vdev_stat_t *vs;
uint_t vsc;
+ ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
+
if (spa->spa_l2cache.sav_count == 0)
return;
- spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
-
VERIFY(nvlist_lookup_nvlist(config,
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
@@ -1671,8 +1811,6 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
vdev_get_stats(vd, vs);
}
}
-
- spa_config_exit(spa, SCL_CONFIG, FTAG);
}
int
@@ -1684,16 +1822,27 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
*config = NULL;
error = spa_open_common(name, &spa, FTAG, config);
- if (spa && *config != NULL) {
- VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT,
- spa_get_errlog_size(spa)) == 0);
+ if (spa != NULL) {
+ /*
+ * This still leaves a window of inconsistency where the spares
+ * or l2cache devices could change and the config would be
+ * self-inconsistent.
+ */
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
- if (spa_suspended(spa))
+ if (*config != NULL) {
VERIFY(nvlist_add_uint64(*config,
- ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0);
+ ZPOOL_CONFIG_ERRCOUNT,
+ spa_get_errlog_size(spa)) == 0);
- spa_add_spares(spa, *config);
- spa_add_l2cache(spa, *config);
+ if (spa_suspended(spa))
+ VERIFY(nvlist_add_uint64(*config,
+ ZPOOL_CONFIG_SUSPENDED,
+ spa->spa_failmode) == 0);
+
+ spa_add_spares(spa, *config);
+ spa_add_l2cache(spa, *config);
+ }
}
/*
@@ -1715,8 +1864,10 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
}
}
- if (spa != NULL)
+ if (spa != NULL) {
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_close(spa, FTAG);
+ }
return (error);
}
@@ -1887,11 +2038,9 @@ spa_l2cache_drop(spa_t *spa)
vd = sav->sav_vdevs[i];
ASSERT(vd != NULL);
- if ((spa_mode & FWRITE) &&
- spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL &&
- l2arc_vdev_present(vd)) {
+ if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
+ pool != 0ULL && l2arc_vdev_present(vd))
l2arc_remove_vdev(vd);
- }
if (vd->vdev_isl2cache)
spa_l2cache_remove(vd);
vdev_clear_stats(vd);
@@ -1932,12 +2081,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
spa = spa_add(pool, altroot);
- spa_activate(spa);
+ spa_activate(spa, spa_mode_global);
spa->spa_uberblock.ub_txg = txg - 1;
if (props && (error = spa_prop_validate(spa, props))) {
- spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
mutex_exit(&spa_namespace_lock);
@@ -1952,6 +2100,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_ubsync = spa->spa_uberblock;
/*
+ * Create "The Godfather" zio to hold all async IOs
+ */
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
+
+ /*
* Create the root vdev.
*/
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
@@ -2069,8 +2223,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
- if (props)
+ if (props != NULL) {
+ spa_configfile_set(spa, props, B_FALSE);
spa_sync_props(spa, props, CRED(), tx);
+ }
dmu_tx_commit(tx);
@@ -2095,148 +2251,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
return (0);
}
-/*
- * Import the given pool into the system. We set up the necessary spa_t and
- * then call spa_load() to do the dirty work.
- */
-static int
-spa_import_common(const char *pool, nvlist_t *config, nvlist_t *props,
- boolean_t isroot, boolean_t allowfaulted)
-{
- spa_t *spa;
- char *altroot = NULL;
- int error, loaderr;
- nvlist_t *nvroot;
- nvlist_t **spares, **l2cache;
- uint_t nspares, nl2cache;
-
- /*
- * If a pool with this name exists, return failure.
- */
- mutex_enter(&spa_namespace_lock);
- if (spa_lookup(pool) != NULL) {
- mutex_exit(&spa_namespace_lock);
- return (EEXIST);
- }
-
- /*
- * Create and initialize the spa structure.
- */
- (void) nvlist_lookup_string(props,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
- spa = spa_add(pool, altroot);
- spa_activate(spa);
-
- if (allowfaulted)
- spa->spa_import_faulted = B_TRUE;
- spa->spa_is_root = isroot;
-
- /*
- * Pass off the heavy lifting to spa_load().
- * Pass TRUE for mosconfig (unless this is a root pool) because
- * the user-supplied config is actually the one to trust when
- * doing an import.
- */
- loaderr = error = spa_load(spa, config, SPA_LOAD_IMPORT, !isroot);
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- /*
- * Toss any existing sparelist, as it doesn't have any validity anymore,
- * and conflicts with spa_has_spare().
- */
- if (!isroot && spa->spa_spares.sav_config) {
- nvlist_free(spa->spa_spares.sav_config);
- spa->spa_spares.sav_config = NULL;
- spa_load_spares(spa);
- }
- if (!isroot && spa->spa_l2cache.sav_config) {
- nvlist_free(spa->spa_l2cache.sav_config);
- spa->spa_l2cache.sav_config = NULL;
- spa_load_l2cache(spa);
- }
-
- VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (error == 0)
- error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE);
- if (error == 0)
- error = spa_validate_aux(spa, nvroot, -1ULL,
- VDEV_ALLOC_L2CACHE);
- spa_config_exit(spa, SCL_ALL, FTAG);
-
- if (error != 0 || (props && (error = spa_prop_set(spa, props)))) {
- if (loaderr != 0 && loaderr != EINVAL && allowfaulted) {
- /*
- * If we failed to load the pool, but 'allowfaulted' is
- * set, then manually set the config as if the config
- * passed in was specified in the cache file.
- */
- error = 0;
- spa->spa_import_faulted = B_FALSE;
- if (spa->spa_config == NULL)
- spa->spa_config = spa_config_generate(spa,
- NULL, -1ULL, B_TRUE);
- spa_unload(spa);
- spa_deactivate(spa);
- spa_config_sync(spa, B_FALSE, B_TRUE);
- } else {
- spa_unload(spa);
- spa_deactivate(spa);
- spa_remove(spa);
- }
- mutex_exit(&spa_namespace_lock);
- return (error);
- }
-
- /*
- * Override any spares and level 2 cache devices as specified by
- * the user, as these may have correct device names/devids, etc.
- */
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- if (spa->spa_spares.sav_config)
- VERIFY(nvlist_remove(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
- else
- VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- spa_load_spares(spa);
- spa_config_exit(spa, SCL_ALL, FTAG);
- spa->spa_spares.sav_sync = B_TRUE;
- }
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
- &l2cache, &nl2cache) == 0) {
- if (spa->spa_l2cache.sav_config)
- VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
- else
- VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- spa_load_l2cache(spa);
- spa_config_exit(spa, SCL_ALL, FTAG);
- spa->spa_l2cache.sav_sync = B_TRUE;
- }
-
- if (spa_mode & FWRITE) {
- /*
- * Update the config cache to include the newly-imported pool.
- */
- spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, isroot);
- }
-
- spa->spa_import_faulted = B_FALSE;
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-#if defined(sun)
+#ifdef sun
#ifdef _KERNEL
/*
* Build a "root" vdev for a top level vdev read in from a rootpool
@@ -2372,11 +2387,11 @@ spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf)
char *cdevid, *cpath;
uint64_t tmptxg;
+ cpath = NULL;
+ cdevid = NULL;
if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH,
- &cpath) != 0)
- return (EINVAL);
- if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_DEVID,
- &cdevid) != 0)
+ &cpath) != 0 && nvlist_lookup_string(child[c],
+ ZPOOL_CONFIG_DEVID, &cdevid) != 0)
return (EINVAL);
if ((spa_check_rootconf(cpath, cdevid, NULL,
&tmptxg) == 0) && (tmptxg > txg)) {
@@ -2414,6 +2429,7 @@ spa_import_rootpool(char *devpath, char *devid)
nvlist_t *conf = NULL;
char *pname;
int error;
+ spa_t *spa;
/*
* Get the vdev pathname and configuation from the most
@@ -2429,18 +2445,24 @@ spa_import_rootpool(char *devpath, char *devid)
VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0);
- /*
- * We specify 'allowfaulted' for this to be treated like spa_open()
- * instead of spa_import(). This prevents us from marking vdevs as
- * persistently unavailable, and generates FMA ereports as if it were a
- * pool open, not import.
- */
- error = spa_import_common(pname, conf, NULL, B_TRUE, B_TRUE);
- if (error == EEXIST)
- error = 0;
+ mutex_enter(&spa_namespace_lock);
+ if ((spa = spa_lookup(pname)) != NULL) {
+ /*
+ * Remove the existing root pool from the namespace so that we
+ * can replace it with the correct config we just read in.
+ */
+ spa_remove(spa);
+ }
+
+ spa = spa_add(pname, NULL);
+ spa->spa_is_root = B_TRUE;
+ spa->spa_load_verbatim = B_TRUE;
+
+ VERIFY(nvlist_dup(conf, &spa->spa_config, 0) == 0);
+ mutex_exit(&spa_namespace_lock);
nvlist_free(conf);
- return (error);
+ return (0);
msg_out:
cmn_err(CE_NOTE, "\n"
@@ -2453,23 +2475,170 @@ msg_out:
return (error);
}
#endif
-#endif
+#endif /* sun */
/*
- * Import a non-root pool into the system.
+ * Take a pool and insert it into the namespace as if it had been loaded at
+ * boot.
*/
int
-spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
+spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
{
- return (spa_import_common(pool, config, props, B_FALSE, B_FALSE));
+ spa_t *spa;
+ char *altroot = NULL;
+
+ mutex_enter(&spa_namespace_lock);
+ if (spa_lookup(pool) != NULL) {
+ mutex_exit(&spa_namespace_lock);
+ return (EEXIST);
+ }
+
+ (void) nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
+ spa = spa_add(pool, altroot);
+
+ spa->spa_load_verbatim = B_TRUE;
+
+ VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
+
+ if (props != NULL)
+ spa_configfile_set(spa, props, B_FALSE);
+
+ spa_config_sync(spa, B_FALSE, B_TRUE);
+
+ mutex_exit(&spa_namespace_lock);
+
+ return (0);
}
+/*
+ * Import a non-root pool into the system.
+ */
int
-spa_import_faulted(const char *pool, nvlist_t *config, nvlist_t *props)
+spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
{
- return (spa_import_common(pool, config, props, B_FALSE, B_TRUE));
-}
+ spa_t *spa;
+ char *altroot = NULL;
+ int error;
+ nvlist_t *nvroot;
+ nvlist_t **spares, **l2cache;
+ uint_t nspares, nl2cache;
+
+ /*
+ * If a pool with this name exists, return failure.
+ */
+ mutex_enter(&spa_namespace_lock);
+ if ((spa = spa_lookup(pool)) != NULL) {
+ mutex_exit(&spa_namespace_lock);
+ return (EEXIST);
+ }
+
+ /*
+ * Create and initialize the spa structure.
+ */
+ (void) nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
+ spa = spa_add(pool, altroot);
+ spa_activate(spa, spa_mode_global);
+
+ /*
+ * Don't start async tasks until we know everything is healthy.
+ */
+ spa_async_suspend(spa);
+
+ /*
+ * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig
+ * because the user-supplied config is actually the one to trust when
+ * doing an import.
+ */
+ error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ /*
+ * Toss any existing sparelist, as it doesn't have any validity
+ * anymore, and conflicts with spa_has_spare().
+ */
+ if (spa->spa_spares.sav_config) {
+ nvlist_free(spa->spa_spares.sav_config);
+ spa->spa_spares.sav_config = NULL;
+ spa_load_spares(spa);
+ }
+ if (spa->spa_l2cache.sav_config) {
+ nvlist_free(spa->spa_l2cache.sav_config);
+ spa->spa_l2cache.sav_config = NULL;
+ spa_load_l2cache(spa);
+ }
+
+ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ if (error == 0)
+ error = spa_validate_aux(spa, nvroot, -1ULL,
+ VDEV_ALLOC_SPARE);
+ if (error == 0)
+ error = spa_validate_aux(spa, nvroot, -1ULL,
+ VDEV_ALLOC_L2CACHE);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ if (props != NULL)
+ spa_configfile_set(spa, props, B_FALSE);
+
+ if (error != 0 || (props && spa_writeable(spa) &&
+ (error = spa_prop_set(spa, props)))) {
+ spa_unload(spa);
+ spa_deactivate(spa);
+ spa_remove(spa);
+ mutex_exit(&spa_namespace_lock);
+ return (error);
+ }
+
+ spa_async_resume(spa);
+
+ /*
+ * Override any spares and level 2 cache devices as specified by
+ * the user, as these may have correct device names/devids, etc.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ if (spa->spa_spares.sav_config)
+ VERIFY(nvlist_remove(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+ else
+ VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_load_spares(spa);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ spa->spa_spares.sav_sync = B_TRUE;
+ }
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+ &l2cache, &nl2cache) == 0) {
+ if (spa->spa_l2cache.sav_config)
+ VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
+ else
+ VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_load_l2cache(spa);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ spa->spa_l2cache.sav_sync = B_TRUE;
+ }
+
+ if (spa_writeable(spa)) {
+ /*
+ * Update the config cache to include the newly-imported pool.
+ */
+ spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
+ }
+
+ mutex_exit(&spa_namespace_lock);
+
+ return (0);
+}
/*
* This (illegal) pool name is used when temporarily importing a spa_t in order
@@ -2497,7 +2666,7 @@ spa_tryimport(nvlist_t *tryconfig)
*/
mutex_enter(&spa_namespace_lock);
spa = spa_add(TRYIMPORT_NAME, NULL);
- spa_activate(spa);
+ spa_activate(spa, FREAD);
/*
* Pass off the heavy lifting to spa_load().
@@ -2553,8 +2722,10 @@ spa_tryimport(nvlist_t *tryconfig)
/*
* Add the list of hot spares and level 2 cache devices.
*/
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
spa_add_spares(spa, config);
spa_add_l2cache(spa, config);
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
}
spa_unload(spa);
@@ -2583,7 +2754,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
if (oldconfig)
*oldconfig = NULL;
- if (!(spa_mode & FWRITE))
+ if (!(spa_mode_global & FWRITE))
return (EROFS);
mutex_enter(&spa_namespace_lock);
@@ -2718,7 +2889,7 @@ int
spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
{
uint64_t txg;
- int c, error;
+ int error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *tvd;
nvlist_t **spares, **l2cache;
@@ -2757,7 +2928,7 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
/*
* Transfer each new top-level vdev from vd to rvd.
*/
- for (c = 0; c < vd->vdev_children; c++) {
+ for (int c = 0; c < vd->vdev_children; c++) {
tvd = vd->vdev_child[c];
vdev_remove_child(vd, tvd);
tvd->vdev_id = rvd->vdev_children;
@@ -2965,13 +3136,14 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
open_txg = txg + TXG_CONCURRENT_STATES - 1;
- mutex_enter(&newvd->vdev_dtl_lock);
- space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL,
- open_txg - TXG_INITIAL + 1);
- mutex_exit(&newvd->vdev_dtl_lock);
+ vdev_dtl_dirty(newvd, DTL_MISSING,
+ TXG_INITIAL, open_txg - TXG_INITIAL + 1);
- if (newvd->vdev_isspare)
+ if (newvd->vdev_isspare) {
spa_spare_activate(newvd);
+ spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
+ }
+
oldvdpath = spa_strdup(oldvd->vdev_path);
newvdpath = spa_strdup(newvd->vdev_path);
newvd_isspare = newvd->vdev_isspare;
@@ -3012,10 +3184,10 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* is a replacing vdev.
*/
int
-spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
+spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
{
uint64_t txg;
- int c, t, error;
+ int error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *pvd, *cvd, *tvd;
boolean_t unspare = B_FALSE;
@@ -3035,6 +3207,22 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
pvd = vd->vdev_parent;
/*
+ * If the parent/child relationship is not as expected, don't do it.
+ * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
+ * vdev that's replacing B with C. The user's intent in replacing
+ * is to go from M(A,B) to M(A,C). If the user decides to cancel
+ * the replace by detaching C, the expected behavior is to end up
+ * M(A,B). But suppose that right after deciding to detach C,
+ * the replacement of B completes. We would have M(A,C), and then
+ * ask to detach C, which would leave us with just A -- not what
+ * the user wanted. To prevent this, we make sure that the
+ * parent/child relationship hasn't changed -- in this example,
+ * that C's parent is still the replacing vdev R.
+ */
+ if (pvd->vdev_guid != pguid && pguid != 0)
+ return (spa_vdev_exit(spa, NULL, txg, EBUSY));
+
+ /*
* If replace_done is specified, only remove this device if it's
* the first child of a replacing vdev. For the 'spare' vdev, either
* disk can be removed.
@@ -3060,36 +3248,13 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
/*
- * If there's only one replica, you can't detach it.
+ * If this device has the only valid copy of some data,
+ * we cannot safely detach it.
*/
- if (pvd->vdev_children <= 1)
+ if (vdev_dtl_required(vd))
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
- /*
- * If all siblings have non-empty DTLs, this device may have the only
- * valid copy of the data, which means we cannot safely detach it.
- *
- * XXX -- as in the vdev_offline() case, we really want a more
- * precise DTL check.
- */
- for (c = 0; c < pvd->vdev_children; c++) {
- uint64_t dirty;
-
- cvd = pvd->vdev_child[c];
- if (cvd == vd)
- continue;
- if (vdev_is_dead(cvd))
- continue;
- mutex_enter(&cvd->vdev_dtl_lock);
- dirty = cvd->vdev_dtl_map.sm_space |
- cvd->vdev_dtl_scrub.sm_space;
- mutex_exit(&cvd->vdev_dtl_lock);
- if (!dirty)
- break;
- }
-
- if (c == pvd->vdev_children)
- return (spa_vdev_exit(spa, NULL, txg, EBUSY));
+ ASSERT(pvd->vdev_children >= 2);
/*
* If we are detaching the second disk from a replacing vdev, then
@@ -3115,7 +3280,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
* active spare list for the pool.
*/
if (pvd->vdev_ops == &vdev_spare_ops &&
- vd->vdev_id == 0)
+ vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
unspare = B_TRUE;
/*
@@ -3141,14 +3306,18 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
/*
* If we need to remove the remaining child from the list of hot spares,
- * do it now, marking the vdev as no longer a spare in the process. We
- * must do this before vdev_remove_parent(), because that can change the
- * GUID if it creates a new toplevel GUID.
+ * do it now, marking the vdev as no longer a spare in the process.
+ * We must do this before vdev_remove_parent(), because that can
+ * change the GUID if it creates a new toplevel GUID. For a similar
+ * reason, we must remove the spare now, in the same txg as the detach;
+ * otherwise someone could attach a new sibling, change the GUID, and
+ * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
*/
if (unspare) {
ASSERT(cvd->vdev_isspare);
spa_spare_remove(cvd);
unspare_guid = cvd->vdev_guid;
+ (void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
}
/*
@@ -3186,7 +3355,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
* But first make sure we're not on any *other* txg's DTL list, to
* prevent vd from being accessed after it's freed.
*/
- for (t = 0; t < TXG_SIZE; t++)
+ for (int t = 0; t < TXG_SIZE; t++)
(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
vd->vdev_detached = B_TRUE;
vdev_dirty(tvd, VDD_DTL, vd, txg);
@@ -3201,11 +3370,14 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
* list of every other pool.
*/
if (unspare) {
+ spa_t *myspa = spa;
spa = NULL;
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa)) != NULL) {
if (spa->spa_state != POOL_STATE_ACTIVE)
continue;
+ if (spa == myspa)
+ continue;
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
@@ -3269,10 +3441,12 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
vdev_t *vd;
nvlist_t **spares, **l2cache, *nv;
uint_t nspares, nl2cache;
- uint64_t txg;
+ uint64_t txg = 0;
int error = 0;
+ boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
- txg = spa_vdev_enter(spa);
+ if (!locked)
+ txg = spa_vdev_enter(spa);
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -3315,7 +3489,10 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
error = ENOENT;
}
- return (spa_vdev_exit(spa, NULL, txg, error));
+ if (!locked)
+ return (spa_vdev_exit(spa, NULL, txg, error));
+
+ return (error);
}
/*
@@ -3341,13 +3518,9 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
oldvd = vd->vdev_child[0];
newvd = vd->vdev_child[1];
- mutex_enter(&newvd->vdev_dtl_lock);
- if (newvd->vdev_dtl_map.sm_space == 0 &&
- newvd->vdev_dtl_scrub.sm_space == 0) {
- mutex_exit(&newvd->vdev_dtl_lock);
+ if (vdev_dtl_empty(newvd, DTL_MISSING) &&
+ !vdev_dtl_required(oldvd))
return (oldvd);
- }
- mutex_exit(&newvd->vdev_dtl_lock);
}
/*
@@ -3357,15 +3530,12 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
newvd = vd->vdev_child[0];
oldvd = vd->vdev_child[1];
- mutex_enter(&newvd->vdev_dtl_lock);
if (newvd->vdev_unspare &&
- newvd->vdev_dtl_map.sm_space == 0 &&
- newvd->vdev_dtl_scrub.sm_space == 0) {
+ vdev_dtl_empty(newvd, DTL_MISSING) &&
+ !vdev_dtl_required(oldvd)) {
newvd->vdev_unspare = 0;
- mutex_exit(&newvd->vdev_dtl_lock);
return (oldvd);
}
- mutex_exit(&newvd->vdev_dtl_lock);
}
return (NULL);
@@ -3374,92 +3544,84 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
static void
spa_vdev_resilver_done(spa_t *spa)
{
- vdev_t *vd;
- vdev_t *pvd;
- uint64_t guid;
- uint64_t pguid = 0;
+ vdev_t *vd, *pvd, *ppvd;
+ uint64_t guid, sguid, pguid, ppguid;
- spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
+ pvd = vd->vdev_parent;
+ ppvd = pvd->vdev_parent;
guid = vd->vdev_guid;
+ pguid = pvd->vdev_guid;
+ ppguid = ppvd->vdev_guid;
+ sguid = 0;
/*
* If we have just finished replacing a hot spared device, then
* we need to detach the parent's first child (the original hot
* spare) as well.
*/
- pvd = vd->vdev_parent;
- if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
- pvd->vdev_id == 0) {
+ if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
- ASSERT(pvd->vdev_parent->vdev_children == 2);
- pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid;
+ ASSERT(ppvd->vdev_children == 2);
+ sguid = ppvd->vdev_child[1]->vdev_guid;
}
- spa_config_exit(spa, SCL_CONFIG, FTAG);
- if (spa_vdev_detach(spa, guid, B_TRUE) != 0)
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
return;
- if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0)
+ if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
return;
- spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
}
- spa_config_exit(spa, SCL_CONFIG, FTAG);
+ spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
- * Update the stored path for this vdev. Dirty the vdev configuration, relying
- * on spa_vdev_enter/exit() to synchronize the labels and cache.
+ * Update the stored path or FRU for this vdev. Dirty the vdev configuration,
+ * relying on spa_vdev_enter/exit() to synchronize the labels and cache.
*/
int
-spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
+spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
+ boolean_t ispath)
{
vdev_t *vd;
uint64_t txg;
txg = spa_vdev_enter(spa);
- if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) {
- /*
- * Determine if this is a reference to a hot spare device. If
- * it is, update the path manually as there is no associated
- * vdev_t that can be synced to disk.
- */
- nvlist_t **spares;
- uint_t i, nspares;
-
- if (spa->spa_spares.sav_config != NULL) {
- VERIFY(nvlist_lookup_nvlist_array(
- spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0);
- for (i = 0; i < nspares; i++) {
- uint64_t theguid;
- VERIFY(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &theguid) == 0);
- if (theguid == guid) {
- VERIFY(nvlist_add_string(spares[i],
- ZPOOL_CONFIG_PATH, newpath) == 0);
- spa_load_spares(spa);
- spa->spa_spares.sav_sync = B_TRUE;
- return (spa_vdev_exit(spa, NULL, txg,
- 0));
- }
- }
- }
-
+ if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
return (spa_vdev_exit(spa, NULL, txg, ENOENT));
- }
if (!vd->vdev_ops->vdev_op_leaf)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
- spa_strfree(vd->vdev_path);
- vd->vdev_path = spa_strdup(newpath);
+ if (ispath) {
+ spa_strfree(vd->vdev_path);
+ vd->vdev_path = spa_strdup(value);
+ } else {
+ if (vd->vdev_fru != NULL)
+ spa_strfree(vd->vdev_fru);
+ vd->vdev_fru = spa_strdup(value);
+ }
vdev_config_dirty(vd->vdev_top);
return (spa_vdev_exit(spa, NULL, txg, 0));
}
+int
+spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
+{
+ return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
+}
+
+int
+spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
+{
+ return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
+}
+
/*
* ==========================================================================
* SPA Scrubbing
@@ -3510,7 +3672,17 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
if (vd->vdev_remove_wanted) {
vd->vdev_remove_wanted = 0;
vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
- vdev_clear(spa, vd);
+
+ /*
+ * We want to clear the stats, but we don't want to do a full
+ * vdev_clear() as that will cause us to throw away
+ * degraded/faulted state as well as attempt to reopen the
+ * device, all of which is a waste.
+ */
+ vd->vdev_stat.vs_read_errors = 0;
+ vd->vdev_stat.vs_write_errors = 0;
+ vd->vdev_stat.vs_checksum_errors = 0;
+
vdev_state_dirty(vd->vdev_top);
}
@@ -3789,7 +3961,6 @@ spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
zpool_prop_t prop;
const char *propname;
zprop_type_t proptype;
- spa_config_dirent_t *dp;
mutex_enter(&spa->spa_props_lock);
@@ -3822,23 +3993,8 @@ spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
case ZPOOL_PROP_CACHEFILE:
/*
- * 'cachefile' is a non-persistent property, but note
- * an async request that the config cache needs to be
- * udpated.
+ * 'cachefile' is also a non-persisitent property.
*/
- VERIFY(nvpair_value_string(elem, &strval) == 0);
-
- dp = kmem_alloc(sizeof (spa_config_dirent_t), KM_SLEEP);
-
- if (strval[0] == '\0')
- dp->scd_path = spa_strdup(spa_config_path);
- else if (strcmp(strval, "none") == 0)
- dp->scd_path = NULL;
- else
- dp->scd_path = spa_strdup(strval);
-
- list_insert_head(&spa->spa_config_list, dp);
- spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
break;
default:
/*
@@ -3939,9 +4095,22 @@ spa_sync(spa_t *spa, uint64_t txg)
* into config changes that go out with this transaction group.
*/
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
- while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
- vdev_state_clean(vd);
- vdev_config_dirty(vd);
+ while (list_head(&spa->spa_state_dirty_list) != NULL) {
+ /*
+ * We need the write lock here because, for aux vdevs,
+ * calling vdev_config_dirty() modifies sav_config.
+ * This is ugly and will become unnecessary when we
+ * eliminate the aux vdev wart by integrating all vdevs
+ * into the root vdev tree.
+ */
+ spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+ spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
+ while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
+ vdev_state_clean(vd);
+ vdev_config_dirty(vd);
+ }
+ spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+ spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
}
spa_config_exit(spa, SCL_STATE, FTAG);
@@ -4175,7 +4344,7 @@ spa_evict_all(void)
}
vdev_t *
-spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t l2cache)
+spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
{
vdev_t *vd;
int i;
@@ -4183,12 +4352,18 @@ spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t l2cache)
if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
return (vd);
- if (l2cache) {
+ if (aux) {
for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
vd = spa->spa_l2cache.sav_vdevs[i];
if (vd->vdev_guid == guid)
return (vd);
}
+
+ for (i = 0; i < spa->spa_spares.sav_count; i++) {
+ vd = spa->spa_spares.sav_vdevs[i];
+ if (vd->vdev_guid == guid)
+ return (vd);
+ }
}
return (NULL);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
index 51770fc095f9..34050ef9150a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
@@ -212,6 +212,9 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ if (rootdir == NULL || !(spa_mode_global & FWRITE))
+ return;
+
/*
* Iterate over all cachefiles for the pool, past or present. When the
* cachefile is changed, the new one is pushed onto this list, allowing
@@ -386,23 +389,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
}
/*
- * For a pool that's not currently a booting rootpool, update all disk labels,
- * generate a fresh config based on the current in-core state, and sync the
- * global config cache.
- */
-void
-spa_config_update(spa_t *spa, int what)
-{
- spa_config_update_common(spa, what, FALSE);
-}
-
-/*
* Update all disk labels, generate a fresh config based on the current
* in-core state, and sync the global config cache (do not sync the config
* cache if this is a booting rootpool).
*/
void
-spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
+spa_config_update(spa_t *spa, int what)
{
vdev_t *rvd = spa->spa_root_vdev;
uint64_t txg;
@@ -440,9 +432,9 @@ spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
/*
* Update the global config cache to reflect the new mosconfig.
*/
- if (!isroot)
+ if (!spa->spa_is_root)
spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
if (what == SPA_CONFIG_UPDATE_POOL)
- spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot);
+ spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
index e5c395f63d2b..e1ae4917137a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Routines to manage the on-disk persistent error log.
*
@@ -61,8 +59,8 @@
* lowercase hexidecimal numbers that don't overflow.
*/
#ifdef _KERNEL
-static uint64_t
-_strtonum(char *str, char **nptr)
+uint64_t
+_strtonum(const char *str, char **nptr)
{
uint64_t val = 0;
char c;
@@ -82,7 +80,8 @@ _strtonum(char *str, char **nptr)
str++;
}
- *nptr = str;
+ if (nptr)
+ *nptr = (char *)str;
return (val);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
index de520d39e439..b403ccbcc444 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/zap.h>
@@ -127,12 +125,12 @@ spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
- buf)) != 0)
+ buf, DMU_READ_PREFETCH)) != 0)
return (err);
if (firstread != sizeof (reclen)) {
if ((err = dmu_read(mos, spa->spa_history,
shpp->sh_pool_create_len, sizeof (reclen) - firstread,
- buf + firstread)) != 0)
+ buf + firstread, DMU_READ_PREFETCH)) != 0)
return (err);
}
@@ -381,10 +379,11 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
return (0);
}
- err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf);
+ err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
+ DMU_READ_PREFETCH);
if (leftover && err == 0) {
err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
- leftover, buf + read_len);
+ leftover, buf + read_len, DMU_READ_PREFETCH);
}
mutex_exit(&spa->spa_history_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 5735d312921c..89e0301873cf 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -230,7 +230,7 @@ static kmutex_t spa_l2cache_lock;
static avl_tree_t spa_l2cache_avl;
kmem_cache_t *spa_buffer_pool;
-int spa_mode;
+int spa_mode_global;
#ifdef ZFS_DEBUG
/* Everything except dprintf is on by default in debug builds */
@@ -429,7 +429,6 @@ spa_add(const char *name, const char *altroot)
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_async_root_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -438,7 +437,6 @@ spa_add(const char *name, const char *altroot)
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&spa->spa_async_root_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
@@ -512,12 +510,10 @@ spa_remove(spa_t *spa)
spa_config_lock_destroy(spa);
cv_destroy(&spa->spa_async_cv);
- cv_destroy(&spa->spa_async_root_cv);
cv_destroy(&spa->spa_scrub_io_cv);
cv_destroy(&spa->spa_suspend_cv);
mutex_destroy(&spa->spa_async_lock);
- mutex_destroy(&spa->spa_async_root_lock);
mutex_destroy(&spa->spa_scrub_lock);
mutex_destroy(&spa->spa_errlog_lock);
mutex_destroy(&spa->spa_errlist_lock);
@@ -884,8 +880,10 @@ spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
txg_wait_synced(spa->spa_dsl_pool, txg);
if (vd != NULL) {
- ASSERT(!vd->vdev_detached || vd->vdev_dtl.smo_object == 0);
+ ASSERT(!vd->vdev_detached || vd->vdev_dtl_smo.smo_object == 0);
+ spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
vdev_free(vd);
+ spa_config_exit(spa, SCL_ALL, spa);
}
/*
@@ -916,6 +914,15 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
spa_config_exit(spa, SCL_STATE_ALL, spa);
+ /*
+ * If anything changed, wait for it to sync. This ensures that,
+ * from the system administrator's perspective, zpool(1M) commands
+ * are synchronous. This is important for things like zpool offline:
+ * when the command completes, you expect no further I/O from ZFS.
+ */
+ if (vd != NULL)
+ txg_wait_synced(spa->spa_dsl_pool, 0);
+
return (error);
}
@@ -1118,6 +1125,37 @@ zfs_panic_recover(const char *fmt, ...)
}
/*
+ * This is a stripped-down version of strtoull, suitable only for converting
+ * lowercase hexidecimal numbers that don't overflow.
+ */
+uint64_t
+zfs_strtonum(const char *str, char **nptr)
+{
+ uint64_t val = 0;
+ char c;
+ int digit;
+
+ while ((c = *str) != '\0') {
+ if (c >= '0' && c <= '9')
+ digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ digit = 10 + c - 'a';
+ else
+ break;
+
+ val *= 16;
+ val += digit;
+
+ str++;
+ }
+
+ if (nptr)
+ *nptr = (char *)str;
+
+ return (val);
+}
+
+/*
* ==========================================================================
* Accessor functions
* ==========================================================================
@@ -1355,7 +1393,7 @@ spa_init(int mode)
avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
offsetof(spa_aux_t, aux_avl));
- spa_mode = mode;
+ spa_mode_global = mode;
refcount_sysinit();
unique_init();
@@ -1412,3 +1450,15 @@ spa_is_root(spa_t *spa)
{
return (spa->spa_is_root);
}
+
+boolean_t
+spa_writeable(spa_t *spa)
+{
+ return (!!(spa->spa_mode & FWRITE));
+}
+
+int
+spa_mode(spa_t *spa)
+{
+ return (spa->spa_mode);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
index 0f247c0a5838..75b55d5c1ca7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -116,12 +116,23 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
if (merge_before && merge_after) {
avl_remove(&sm->sm_root, ss_before);
+ if (sm->sm_pp_root) {
+ avl_remove(sm->sm_pp_root, ss_before);
+ avl_remove(sm->sm_pp_root, ss_after);
+ }
ss_after->ss_start = ss_before->ss_start;
kmem_free(ss_before, sizeof (*ss_before));
+ ss = ss_after;
} else if (merge_before) {
ss_before->ss_end = end;
+ if (sm->sm_pp_root)
+ avl_remove(sm->sm_pp_root, ss_before);
+ ss = ss_before;
} else if (merge_after) {
ss_after->ss_start = start;
+ if (sm->sm_pp_root)
+ avl_remove(sm->sm_pp_root, ss_after);
+ ss = ss_after;
} else {
ss = kmem_alloc(sizeof (*ss), KM_SLEEP);
ss->ss_start = start;
@@ -129,6 +140,9 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
avl_insert(&sm->sm_root, ss, where);
}
+ if (sm->sm_pp_root)
+ avl_add(sm->sm_pp_root, ss);
+
sm->sm_space += size;
}
@@ -163,12 +177,17 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
left_over = (ss->ss_start != start);
right_over = (ss->ss_end != end);
+ if (sm->sm_pp_root)
+ avl_remove(sm->sm_pp_root, ss);
+
if (left_over && right_over) {
newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP);
newseg->ss_start = end;
newseg->ss_end = ss->ss_end;
ss->ss_end = start;
avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER);
+ if (sm->sm_pp_root)
+ avl_add(sm->sm_pp_root, newseg);
} else if (left_over) {
ss->ss_end = start;
} else if (right_over) {
@@ -176,12 +195,16 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
} else {
avl_remove(&sm->sm_root, ss);
kmem_free(ss, sizeof (*ss));
+ ss = NULL;
}
+ if (sm->sm_pp_root && ss != NULL)
+ avl_add(sm->sm_pp_root, ss);
+
sm->sm_space -= size;
}
-int
+boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
@@ -221,59 +244,10 @@ space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
{
space_seg_t *ss;
- for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
- func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
-}
-
-void
-space_map_excise(space_map_t *sm, uint64_t start, uint64_t size)
-{
- avl_tree_t *t = &sm->sm_root;
- avl_index_t where;
- space_seg_t *ss, search;
- uint64_t end = start + size;
- uint64_t rm_start, rm_end;
-
ASSERT(MUTEX_HELD(sm->sm_lock));
- search.ss_start = start;
- search.ss_end = start;
-
- for (;;) {
- ss = avl_find(t, &search, &where);
-
- if (ss == NULL)
- ss = avl_nearest(t, where, AVL_AFTER);
-
- if (ss == NULL || ss->ss_start >= end)
- break;
-
- rm_start = MAX(ss->ss_start, start);
- rm_end = MIN(ss->ss_end, end);
-
- space_map_remove(sm, rm_start, rm_end - rm_start);
- }
-}
-
-/*
- * Replace smd with the union of smd and sms.
- */
-void
-space_map_union(space_map_t *smd, space_map_t *sms)
-{
- avl_tree_t *t = &sms->sm_root;
- space_seg_t *ss;
-
- ASSERT(MUTEX_HELD(smd->sm_lock));
-
- /*
- * For each source segment, remove any intersections with the
- * destination, then add the source segment to the destination.
- */
- for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) {
- space_map_excise(smd, ss->ss_start, ss->ss_end - ss->ss_start);
- space_map_add(smd, ss->ss_start, ss->ss_end - ss->ss_start);
- }
+ for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
+ func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
}
/*
@@ -337,7 +311,8 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
smo->smo_object, offset, size);
mutex_exit(sm->sm_lock);
- error = dmu_read(os, smo->smo_object, offset, size, entry_map);
+ error = dmu_read(os, smo->smo_object, offset, size, entry_map,
+ DMU_READ_PREFETCH);
mutex_enter(sm->sm_lock);
if (error != 0)
break;
@@ -391,6 +366,15 @@ space_map_unload(space_map_t *sm)
}
uint64_t
+space_map_maxsize(space_map_t *sm)
+{
+ if (sm->sm_loaded && sm->sm_ops != NULL)
+ return (sm->sm_ops->smop_max(sm));
+ else
+ return (-1ULL);
+}
+
+uint64_t
space_map_alloc(space_map_t *sm, uint64_t size)
{
uint64_t start;
@@ -505,3 +489,131 @@ space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
smo->smo_objsize = 0;
smo->smo_alloc = 0;
}
+
+/*
+ * Space map reference trees.
+ *
+ * A space map is a collection of integers. Every integer is either
+ * in the map, or it's not. A space map reference tree generalizes
+ * the idea: it allows its members to have arbitrary reference counts,
+ * as opposed to the implicit reference count of 0 or 1 in a space map.
+ * This representation comes in handy when computing the union or
+ * intersection of multiple space maps. For example, the union of
+ * N space maps is the subset of the reference tree with refcnt >= 1.
+ * The intersection of N space maps is the subset with refcnt >= N.
+ *
+ * [It's very much like a Fourier transform. Unions and intersections
+ * are hard to perform in the 'space map domain', so we convert the maps
+ * into the 'reference count domain', where it's trivial, then invert.]
+ *
+ * vdev_dtl_reassess() uses computations of this form to determine
+ * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev
+ * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev
+ * has an outage wherever refcnt >= vdev_children.
+ */
+static int
+space_map_ref_compare(const void *x1, const void *x2)
+{
+ const space_ref_t *sr1 = x1;
+ const space_ref_t *sr2 = x2;
+
+ if (sr1->sr_offset < sr2->sr_offset)
+ return (-1);
+ if (sr1->sr_offset > sr2->sr_offset)
+ return (1);
+
+ if (sr1 < sr2)
+ return (-1);
+ if (sr1 > sr2)
+ return (1);
+
+ return (0);
+}
+
+void
+space_map_ref_create(avl_tree_t *t)
+{
+ avl_create(t, space_map_ref_compare,
+ sizeof (space_ref_t), offsetof(space_ref_t, sr_node));
+}
+
+void
+space_map_ref_destroy(avl_tree_t *t)
+{
+ space_ref_t *sr;
+ void *cookie = NULL;
+
+ while ((sr = avl_destroy_nodes(t, &cookie)) != NULL)
+ kmem_free(sr, sizeof (*sr));
+
+ avl_destroy(t);
+}
+
+static void
+space_map_ref_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt)
+{
+ space_ref_t *sr;
+
+ sr = kmem_alloc(sizeof (*sr), KM_SLEEP);
+ sr->sr_offset = offset;
+ sr->sr_refcnt = refcnt;
+
+ avl_add(t, sr);
+}
+
+void
+space_map_ref_add_seg(avl_tree_t *t, uint64_t start, uint64_t end,
+ int64_t refcnt)
+{
+ space_map_ref_add_node(t, start, refcnt);
+ space_map_ref_add_node(t, end, -refcnt);
+}
+
+/*
+ * Convert (or add) a space map into a reference tree.
+ */
+void
+space_map_ref_add_map(avl_tree_t *t, space_map_t *sm, int64_t refcnt)
+{
+ space_seg_t *ss;
+
+ ASSERT(MUTEX_HELD(sm->sm_lock));
+
+ for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
+ space_map_ref_add_seg(t, ss->ss_start, ss->ss_end, refcnt);
+}
+
+/*
+ * Convert a reference tree into a space map. The space map will contain
+ * all members of the reference tree for which refcnt >= minref.
+ */
+void
+space_map_ref_generate_map(avl_tree_t *t, space_map_t *sm, int64_t minref)
+{
+ uint64_t start = -1ULL;
+ int64_t refcnt = 0;
+ space_ref_t *sr;
+
+ ASSERT(MUTEX_HELD(sm->sm_lock));
+
+ space_map_vacate(sm, NULL, NULL);
+
+ for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) {
+ refcnt += sr->sr_refcnt;
+ if (refcnt >= minref) {
+ if (start == -1ULL) {
+ start = sr->sr_offset;
+ }
+ } else {
+ if (start != -1ULL) {
+ uint64_t end = sr->sr_offset;
+ ASSERT(start <= end);
+ if (end > start)
+ space_map_add(sm, start, end - start);
+ start = -1ULL;
+ }
+ }
+ }
+ ASSERT(refcnt == 0);
+ ASSERT(start == -1ULL);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
index 0a39d19241ac..f52851d69f46 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
@@ -85,6 +85,8 @@ void *arc_data_buf_alloc(uint64_t space);
void arc_data_buf_free(void *buf, uint64_t space);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
+arc_buf_t *arc_loan_buf(spa_t *spa, int size);
+void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
index b27d89fe2162..7e2754d000b4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -262,6 +262,7 @@ void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
+void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_clear(dmu_buf_impl_t *db);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index 7befe96bc323..08c30c8ed015 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -61,6 +61,7 @@ struct zbookmark;
struct spa;
struct nvlist;
struct objset_impl;
+struct arc_buf;
struct file;
typedef struct objset objset_t;
@@ -116,6 +117,8 @@ typedef enum dmu_object_type {
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NEXT_CLONES, /* ZAP */
DMU_OT_SCRUB_QUEUE, /* ZAP */
+ DMU_OT_USERGROUP_USED, /* ZAP */
+ DMU_OT_USERGROUP_QUOTA, /* ZAP */
DMU_OT_NUMTYPES
} dmu_object_type_t;
@@ -158,6 +161,9 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
+#define DMU_USERUSED_OBJECT (-1ULL)
+#define DMU_GROUPUSED_OBJECT (-2ULL)
+
/*
* Public routines to create, destroy, open, and close objsets.
*/
@@ -173,7 +179,8 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type,
int dmu_objset_destroy(const char *name);
int dmu_snapshots_destroy(char *fsname, char *snapname);
int dmu_objset_rollback(objset_t *os);
-int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
+int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
+ boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
@@ -400,6 +407,11 @@ void *dmu_buf_get_user(dmu_buf_t *db);
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
/*
+ * Tells if the given dbuf is freeable.
+ */
+boolean_t dmu_buf_freeable(dmu_buf_t *);
+
+/*
* You must create a transaction, then hold the objects which you will
* (or might) modify as part of this transaction. Then you must assign
* the transaction to a transaction group. Once the transaction has
@@ -424,7 +436,7 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
-void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
+void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
@@ -447,8 +459,10 @@ int dmu_free_object(objset_t *os, uint64_t object);
* Canfail routines will return 0 on success, or an errno if there is a
* nonrecoverable I/O error.
*/
+#define DMU_READ_PREFETCH 0 /* prefetch */
+#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf);
+ void *buf, uint32_t flags);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
@@ -456,6 +470,10 @@ int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
+struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
+void dmu_return_arcbuf(struct arc_buf *buf);
+void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
+ dmu_tx_t *tx);
extern int zfs_prefetch_disable;
@@ -562,6 +580,12 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
int maxlen, boolean_t *conflict);
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
+
+typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
+ void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
+ dmu_tx_t *tx);
+extern void dmu_objset_register_type(dmu_objset_type_t ost,
+ objset_used_cb_t *cb);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
extern void *dmu_objset_get_user(objset_t *os);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
index 1d65727808c3..a8022d2eaa8f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
@@ -42,12 +42,20 @@ struct dsl_dataset;
struct dmu_tx;
struct objset_impl;
+#define OBJSET_PHYS_SIZE 2048
+#define OBJSET_OLD_PHYS_SIZE 1024
+
+#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
+
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
- char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
- sizeof (uint64_t)];
+ uint64_t os_flags;
+ char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
+ sizeof (zil_header_t) - sizeof (uint64_t)*2];
+ dnode_phys_t os_userused_dnode;
+ dnode_phys_t os_groupused_dnode;
} objset_phys_t;
struct objset {
@@ -62,6 +70,8 @@ typedef struct objset_impl {
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
dnode_t *os_meta_dnode;
+ dnode_t *os_userused_dnode;
+ dnode_t *os_groupused_dnode;
zilog_t *os_zil;
objset_t os;
uint8_t os_checksum; /* can change, under dsl_dir's locks */
@@ -74,6 +84,8 @@ typedef struct objset_impl {
struct dmu_tx *os_synctx; /* XXX sketchy */
blkptr_t *os_rootbp;
zil_header_t os_zil_header;
+ list_t os_synced_dnodes;
+ uint64_t os_flags;
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
@@ -92,6 +104,7 @@ typedef struct objset_impl {
} objset_impl_t;
#define DMU_META_DNODE_OBJECT 0
+#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
#define DMU_OS_IS_L2CACHEABLE(os) \
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
@@ -106,7 +119,8 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_objset_rollback(objset_t *os);
-int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
+int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
+ boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
@@ -127,6 +141,10 @@ objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
objset_impl_t **osip);
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
+void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
+boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
+int dmu_objset_userspace_upgrade(objset_t *os);
+boolean_t dmu_objset_userspace_present(objset_t *os);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
index be9e56908321..48e4da8cd647 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
@@ -98,7 +98,8 @@ enum dnode_dirtycontext {
};
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
-#define DNODE_FLAG_USED_BYTES (1<<0)
+#define DNODE_FLAG_USED_BYTES (1<<0)
+#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
@@ -131,10 +132,7 @@ typedef struct dnode {
*/
krwlock_t dn_struct_rwlock;
- /*
- * Our link on dataset's dd_dnodes list.
- * Protected by dd_accounting_mtx.
- */
+ /* Our link on dn_objset->os_dnodes list; protected by os_lock. */
list_node_t dn_link;
/* immutable: */
@@ -191,6 +189,9 @@ typedef struct dnode {
/* parent IO for current sync write */
zio_t *dn_zio;
+ /* used in syncing context */
+ dnode_phys_t *dn_oldphys;
+
/* holds prefetch structure */
struct zfetch dn_zfetch;
} dnode_t;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
index 8665aec2dda8..a1c2896e3cfb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -195,7 +195,7 @@ void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
dmu_tx_t *tx);
-int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
+boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
index a29e44e67d0c..b064c9228ec8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DELEG_H
#define _SYS_DSL_DELEG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
@@ -51,6 +49,10 @@ extern "C" {
#define ZFS_DELEG_PERM_ALLOW "allow"
#define ZFS_DELEG_PERM_USERPROP "userprop"
#define ZFS_DELEG_PERM_VSCAN "vscan"
+#define ZFS_DELEG_PERM_USERQUOTA "userquota"
+#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
+#define ZFS_DELEG_PERM_USERUSED "userused"
+#define ZFS_DELEG_PERM_GROUPUSED "groupused"
/*
* Note: the names of properties that are marked delegatable are also
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
index 86b9636ceaab..56d06388cc72 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -107,7 +107,6 @@ int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
-int dsl_dir_is_private(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
const char *name, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
index ef1b9044a0be..d8da295f3386 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
index d66caa86cff6..26018a46d1b2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
@@ -19,18 +19,17 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_PROP_H
#define _SYS_DSL_PROP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
+#include <sys/dsl_synctask.h>
#ifdef __cplusplus
extern "C" {
@@ -66,8 +65,10 @@ int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
+dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
+int dsl_props_set(const char *dsname, nvlist_t *nvl);
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
index 1c9d89e8fd69..5d3e11c971f9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,6 +39,8 @@ extern "C" {
typedef struct metaslab_class metaslab_class_t;
typedef struct metaslab_group metaslab_group_t;
+extern space_map_ops_t *zfs_metaslab_ops;
+
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
uint64_t start, uint64_t size, uint64_t txg);
extern void metaslab_fini(metaslab_t *msp);
@@ -55,7 +57,7 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
-extern metaslab_class_t *metaslab_class_create(void);
+extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
extern void metaslab_class_destroy(metaslab_class_t *mc);
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
index 5980cbc843ac..d67dea7e975e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_IMPL_H
#define _SYS_METASLAB_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/metaslab.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
@@ -41,6 +39,7 @@ extern "C" {
struct metaslab_class {
metaslab_group_t *mc_rotor;
uint64_t mc_allocated;
+ space_map_ops_t *mc_ops;
};
struct metaslab_group {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index 1cfa7ecf6177..f54a5dc52f23 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -208,8 +208,8 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
- (BP_IS_HOLE(bp) ? 0 : \
- BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
+ BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
+
#define BP_SET_LSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
@@ -329,7 +329,7 @@ extern int spa_check_rootconf(char *devpath, char *devid,
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
extern int spa_import_rootpool(char *devpath, char *devid);
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
-extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
+extern int spa_import_verbatim(const char *, nvlist_t *, nvlist_t *);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
extern int spa_destroy(char *pool);
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
@@ -352,9 +352,11 @@ extern void spa_inject_delref(spa_t *spa);
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing);
-extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
+extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
+ int replace_done);
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
+extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
@@ -476,6 +478,10 @@ extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
+extern boolean_t spa_writeable(spa_t *spa);
+extern int spa_mode(spa_t *spa);
+extern uint64_t zfs_strtonum(const char *str, char **nptr);
+#define strtonum(str, nptr) zfs_strtonum((str), (nptr))
/* history logging */
typedef enum history_log_type {
@@ -529,6 +535,7 @@ extern void spa_boot_init();
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
+extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
/* asynchronous event notification */
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
@@ -546,7 +553,7 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_bp(bp, fmt, ...)
#endif
-extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
+extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index 8aeb414fe9de..f3124b1ecc0d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -105,6 +105,7 @@ struct spa {
int spa_inject_ref; /* injection references */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
+ boolean_t spa_load_verbatim; /* load the given config? */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
metaslab_class_t *spa_normal_class; /* normal data class */
@@ -141,9 +142,6 @@ struct spa {
int spa_async_suspended; /* async tasks suspended */
kcondvar_t spa_async_cv; /* wait for thread_exit() */
uint16_t spa_async_tasks; /* async task mask */
- kmutex_t spa_async_root_lock; /* protects async root count */
- uint64_t spa_async_root_count; /* number of async root zios */
- kcondvar_t spa_async_root_cv; /* notify when count == 0 */
char *spa_root; /* alternate root directory */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
@@ -163,13 +161,14 @@ struct spa {
uint64_t spa_failmode; /* failure mode for the pool */
uint64_t spa_delegation; /* delegation on/off */
list_t spa_config_list; /* previous cache file(s) */
+ zio_t *spa_async_zio_root; /* root of all async I/O */
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
- boolean_t spa_import_faulted; /* allow faulted vdevs */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
+ int spa_mode; /* FREAD | FWRITE */
spa_log_state_t spa_log_state; /* log state */
/*
* spa_refcnt & spa_config_lock must be the last elements
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
index db9daef1f156..a682bbd409e8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPACE_MAP_H
#define _SYS_SPACE_MAP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/avl.h>
#include <sys/dmu.h>
@@ -48,16 +46,24 @@ typedef struct space_map {
uint8_t sm_loading; /* map loading? */
kcondvar_t sm_load_cv; /* map load completion */
space_map_ops_t *sm_ops; /* space map block picker ops vector */
+ avl_tree_t *sm_pp_root; /* picker-private AVL tree */
void *sm_ppd; /* picker-private data */
kmutex_t *sm_lock; /* pointer to lock that protects map */
} space_map_t;
typedef struct space_seg {
avl_node_t ss_node; /* AVL node */
+ avl_node_t ss_pp_node; /* AVL picker-private node */
uint64_t ss_start; /* starting offset of this segment */
uint64_t ss_end; /* ending offset (non-inclusive) */
} space_seg_t;
+typedef struct space_ref {
+ avl_node_t sr_node; /* AVL node */
+ uint64_t sr_offset; /* offset (start or end) */
+ int64_t sr_refcnt; /* associated reference count */
+} space_ref_t;
+
typedef struct space_map_obj {
uint64_t smo_object; /* on-disk space map object */
uint64_t smo_objsize; /* size of the object */
@@ -70,6 +76,7 @@ struct space_map_ops {
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
+ uint64_t (*smop_max)(space_map_t *sm);
};
/*
@@ -133,13 +140,12 @@ extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
extern void space_map_destroy(space_map_t *sm);
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
-extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
+extern boolean_t space_map_contains(space_map_t *sm,
+ uint64_t start, uint64_t size);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_walk(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
-extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
-extern void space_map_union(space_map_t *smd, space_map_t *sms);
extern void space_map_load_wait(space_map_t *sm);
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
@@ -149,12 +155,22 @@ extern void space_map_unload(space_map_t *sm);
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
+extern uint64_t space_map_maxsize(space_map_t *sm);
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
extern void space_map_truncate(space_map_obj_t *smo,
objset_t *os, dmu_tx_t *tx);
+extern void space_map_ref_create(avl_tree_t *t);
+extern void space_map_ref_destroy(avl_tree_t *t);
+extern void space_map_ref_add_seg(avl_tree_t *t,
+ uint64_t start, uint64_t end, int64_t refcnt);
+extern void space_map_ref_add_map(avl_tree_t *t,
+ space_map_t *sm, int64_t refcnt);
+extern void space_map_ref_generate_map(avl_tree_t *t,
+ space_map_t *sm, int64_t minref);
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
index 55a0dd5aec0d..b49df8ae0ce3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_IMPL_H
#define _SYS_UBERBLOCK_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/uberblock.h>
#ifdef __cplusplus
@@ -35,6 +33,11 @@ extern "C" {
#endif
/*
+ * For zdb use and debugging purposes only
+ */
+extern uint64_t ub_max_txg;
+
+/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index c070d6f3d623..b8313a920ddd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -36,6 +36,14 @@
extern "C" {
#endif
+typedef enum vdev_dtl_type {
+ DTL_MISSING, /* 0% replication: no copies of the data */
+ DTL_PARTIAL, /* less than 100% replication: some copies missing */
+ DTL_SCRUB, /* unable to fully repair during scrub/resilver */
+ DTL_OUTAGE, /* temporarily missing (used to attempt detach) */
+ DTL_TYPES
+} vdev_dtl_type_t;
+
extern boolean_t zfs_nocacheflush;
extern int vdev_open(vdev_t *);
@@ -50,10 +58,14 @@ extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
extern boolean_t vdev_is_bootable(vdev_t *vd);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
-extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
-extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
+extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d,
+ uint64_t txg, uint64_t size);
+extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
+ uint64_t txg, uint64_t size);
+extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
int scrub_done);
+extern boolean_t vdev_dtl_required(vdev_t *vd);
extern boolean_t vdev_resilver_needed(vdev_t *vd,
uint64_t *minp, uint64_t *maxp);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
index 7e24edea7f38..1406d154d78b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -123,8 +123,7 @@ struct vdev {
vdev_t *vdev_parent; /* parent vdev */
vdev_t **vdev_child; /* array of children */
uint64_t vdev_children; /* number of children */
- space_map_t vdev_dtl_map; /* dirty time log in-core state */
- space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
+ space_map_t vdev_dtl[DTL_TYPES]; /* in-core dirty time logs */
vdev_stat_t vdev_stat; /* virtual device statistics */
/*
@@ -149,7 +148,7 @@ struct vdev {
* Leaf vdev state.
*/
uint64_t vdev_psize; /* physical device capacity */
- space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
+ space_map_obj_t vdev_dtl_smo; /* dirty time log space map obj */
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
uint64_t vdev_wholedisk; /* true if this is a whole disk */
uint64_t vdev_offline; /* persistent offline state */
@@ -160,6 +159,7 @@ struct vdev {
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
char *vdev_physpath; /* vdev device path (if any) */
+ char *vdev_fru; /* physical FRU location */
uint64_t vdev_not_present; /* not present during import */
uint64_t vdev_unspare; /* unspare when resilvering done */
hrtime_t vdev_last_try; /* last reopen time */
@@ -189,8 +189,9 @@ struct vdev {
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
};
-#define VDEV_SKIP_SIZE (8 << 10)
-#define VDEV_BOOT_HEADER_SIZE (8 << 10)
+#define VDEV_PAD_SIZE (8 << 10)
+/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
+#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
@@ -202,26 +203,14 @@ struct vdev {
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
-/* ZFS boot block */
-#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
-#define VDEV_BOOT_VERSION 1 /* version number */
-
-typedef struct vdev_boot_header {
- uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
- uint64_t vb_version; /* VDEV_BOOT_VERSION */
- uint64_t vb_offset; /* start offset (bytes) */
- uint64_t vb_size; /* size (bytes) */
- char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
-} vdev_boot_header_t;
-
typedef struct vdev_phys {
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
zio_block_tail_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
- char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
- vdev_boot_header_t vl_boot_header; /* 8K */
+ char vl_pad1[VDEV_PAD_SIZE]; /* 8K */
+ char vl_pad2[VDEV_PAD_SIZE]; /* 8K */
vdev_phys_t vl_vdev_phys; /* 112K */
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
} vdev_label_t; /* 256K total */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
index f88cc068bd57..ea3a0f632055 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
@@ -186,6 +186,9 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
matchtype_t mt, char *realname, int rn_len,
boolean_t *normalization_conflictp);
+int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
+ int add, uint64_t *towrite, uint64_t *tooverwrite);
+
/*
* Create an attribute with the given name and value.
*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
index 0dc02ab6b0ac..c86bb16de268 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_IMPL_H
#define _SYS_ZAP_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zap.h>
#include <sys/zfs_context.h>
#include <sys/avl.h>
@@ -195,6 +193,8 @@ int fzap_count(zap_t *zap, uint64_t *count);
int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
+int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
+ uint64_t *tooverwrite);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
int fzap_update(zap_name_t *zn,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
index f87823c5d0fe..3607e1f3c937 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -113,8 +113,6 @@ typedef struct zfs_acl_phys {
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
} zfs_acl_phys_t;
-
-
typedef struct acl_ops {
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
void (*ace_mask_set) (void *acep,
@@ -160,12 +158,21 @@ typedef struct zfs_acl {
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
list_t z_acl; /* chunks of ACE data */
acl_ops_t z_ops; /* ACL operations */
- boolean_t z_has_fuids; /* FUIDs present in ACL? */
} zfs_acl_t;
#define ACL_DATA_ALLOCED 0x1
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
+struct zfs_fuid_info;
+
+typedef struct zfs_acl_ids {
+ uint64_t z_fuid; /* file owner fuid */
+ uint64_t z_fgid; /* file group owner fuid */
+ uint64_t z_mode; /* mode to set on create */
+ zfs_acl_t *z_aclp; /* ACL to create with file */
+ struct zfs_fuid_info *z_fuidp; /* for tracking fuids for log */
+} zfs_acl_ids_t;
+
/*
* Property values for acl_mode and acl_inherit.
*
@@ -182,11 +189,12 @@ typedef struct zfs_acl {
struct znode;
struct zfsvfs;
-struct zfs_fuid_info;
#ifdef _KERNEL
-void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
- dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
+int zfs_acl_ids_create(struct znode *, int, vattr_t *,
+ cred_t *, vsecattr_t *, zfs_acl_ids_t *);
+void zfs_acl_ids_free(zfs_acl_ids_t *);
+boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *);
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
void zfs_acl_rele(void *);
@@ -201,9 +209,9 @@ int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
void zfs_acl_free(zfs_acl_t *);
-int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
-int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
- struct zfs_fuid_info **, dmu_tx_t *);
+int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, cred_t *,
+ struct zfs_fuid_info **, zfs_acl_t **);
+int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
index 0dd8f4f5c503..952bb24a4567 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
@@ -134,4 +134,6 @@ extern struct mtx zfs_debug_mtx;
} \
} while (0)
+#define sys_shutdown rebooting
+
#endif /* _SYS_ZFS_CONTEXT_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
index 905e8dd2c0e3..25348d6460f9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_CTLDIR_H
#define _ZFS_CTLDIR_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/vnode.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
@@ -63,6 +61,7 @@ int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
#define ZFSCTL_INO_ROOT 0x1
#define ZFSCTL_INO_SNAPDIR 0x2
+#define ZFSCTL_INO_SHARES 0x3
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
index 0dbb3c52136b..bd2c938515ff 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
@@ -49,7 +49,6 @@ extern "C" {
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
-#define IS_REPLAY 0x04 /* we are replaying intent log */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
@@ -60,7 +59,7 @@ extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
pathname_t *);
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
- uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
+ uint_t, znode_t **, int, zfs_acl_ids_t *);
extern void zfs_rmnode(znode_t *);
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h
index 8d73b41938df..c035707c62a6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_fuid.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_FUID_H
#define _SYS_FS_ZFS_FUID_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#ifdef _KERNEL
#include <sys/kidmap.h>
@@ -51,11 +49,11 @@ typedef enum {
* Estimate space needed for one more fuid table entry.
* for now assume its current size + 1K
*/
-#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
+#define FUID_SIZE_ESTIMATE(z) ((z)->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
-#define FUID_INDEX(x) (x >> 32)
-#define FUID_RID(x) (x & 0xffffffff)
-#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
+#define FUID_INDEX(x) ((x) >> 32)
+#define FUID_RID(x) ((x) & 0xffffffff)
+#define FUID_ENCODE(idx, rid) (((uint64_t)(idx) << 32) | (rid))
/*
* FUIDs cause problems for the intent log
* we need to replay the creation of the FUID,
@@ -104,17 +102,23 @@ struct znode;
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
extern void zfs_fuid_destroy(zfsvfs_t *);
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
- dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
+ cred_t *, zfs_fuid_info_t **);
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
- dmu_tx_t *, zfs_fuid_info_t **);
-extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
- uid_t *gid);
+ zfs_fuid_info_t **);
+extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr,
+ uid_t *uid, uid_t *gid);
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
-extern void zfs_fuid_info_free();
+extern void zfs_fuid_info_free(zfs_fuid_info_t *);
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
+void zfs_fuid_sync(zfsvfs_t *, dmu_tx_t *);
+extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain,
+ char **retdomain, boolean_t addok);
+extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx);
+extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
#endif
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
+void zfs_fuid_avl_tree_create(avl_tree_t *, avl_tree_t *);
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
index 05a21c846ee8..15a4a76c2545 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -169,6 +169,13 @@ typedef struct zfs_cmd {
zinject_record_t zc_inject_record;
} zfs_cmd_t;
+typedef struct zfs_useracct {
+ char zu_domain[256];
+ uid_t zu_rid;
+ uint32_t zu_pad;
+ uint64_t zu_space;
+} zfs_useracct_t;
+
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
index 8d53c02b77aa..163a8000248b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
@@ -47,13 +45,13 @@ struct zfsvfs {
uint64_t z_root; /* id of root znode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
- uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
uint64_t z_fuid_obj; /* fuid table object number */
uint64_t z_fuid_size; /* fuid table size */
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
krwlock_t z_fuid_lock; /* fuid lock */
boolean_t z_fuid_loaded; /* fuid tables are loaded */
+ boolean_t z_fuid_dirty; /* need to sync fuid table ? */
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
uint_t z_acl_mode; /* acl chmod/mode behavior */
@@ -72,8 +70,13 @@ struct zfsvfs {
boolean_t z_issnap; /* true if this is a snapshot */
boolean_t z_vscan; /* virus scan on/off */
boolean_t z_use_fuids; /* version allows fuids */
- kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
+ boolean_t z_replay; /* set during ZIL replay */
+ kmutex_t z_online_recv_lock; /* held while recv in progress */
uint64_t z_version; /* ZPL version */
+ uint64_t z_shares_dir; /* hidden shares dir */
+ kmutex_t z_lock;
+ uint64_t z_userquota_obj;
+ uint64_t z_groupquota_obj;
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
};
@@ -131,6 +134,17 @@ extern int zfs_super_owner;
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
+extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+ const char *domain, uint64_t rid, uint64_t *valuep);
+extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+ uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
+extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+ const char *domain, uint64_t rid, uint64_t quota);
+extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
+ boolean_t isgroup, uint64_t fuid);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
index f91bc9027f7f..47072fb3bfd0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,6 +100,7 @@ extern "C" {
#define ZFS_ROOT_OBJ "ROOT"
#define ZPL_VERSION_STR "VERSION"
#define ZFS_FUID_TABLES "FUID"
+#define ZFS_SHARES_DIR "SHARES"
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
@@ -186,7 +187,6 @@ typedef struct znode {
vnode_t *z_vnode;
uint64_t z_id; /* object ID for this znode */
kmutex_t z_lock; /* znode modification lock */
- krwlock_t z_map_lock; /* page map lock */
krwlock_t z_parent_lock; /* parent lock for directories */
krwlock_t z_name_lock; /* "master" lock for dirent locks */
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
@@ -338,7 +338,6 @@ extern void zfs_remove_op_tables();
extern int zfs_create_op_tables();
extern dev_t zfs_cmpldev(uint64_t);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
-extern int zfs_set_version(const char *name, uint64_t newvers);
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
extern void zfs_znode_dmu_fini(znode_t *);
@@ -367,6 +366,7 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
#endif
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern zil_get_data_t zfs_get_data;
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
index 5212aafceae3..e992f6ac4aca 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
@@ -139,7 +139,8 @@ typedef enum zil_create {
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
-#define TX_MAX_TYPE 20 /* Max transaction type */
+#define TX_WRITE2 20 /* dmu_sync EALREADY write */
+#define TX_MAX_TYPE 21 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
@@ -341,7 +342,6 @@ typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
-typedef void zil_replay_cleaner_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
@@ -356,9 +356,8 @@ extern void zil_free(zilog_t *zilog);
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
extern void zil_close(zilog_t *zilog);
-extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE],
- zil_replay_cleaner_t *replay_cleaner);
+extern void zil_replay(objset_t *os, void *arg,
+ zil_replay_func_t *replay_func[TX_MAX_TYPE]);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
@@ -378,6 +377,7 @@ extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
+extern void zil_get_replay_data(zilog_t *zilog, lr_write_t *lr);
extern int zil_disable;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
index 0fc800b96dea..3f2582931d15 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_IMPL_H
#define _SYS_ZIL_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zil.h>
#include <sys/dmu_objset.h>
@@ -74,13 +72,14 @@ struct zilog {
uint64_t zl_commit_seq; /* committed upto this number */
uint64_t zl_lr_seq; /* log record sequence number */
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
- uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
+ uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */
+ uint64_t zl_replaying_seq; /* current replay seq number */
uint32_t zl_suspend; /* log suspend count */
kcondvar_t zl_cv_writer; /* log writer thread completion */
kcondvar_t zl_cv_suspend; /* log suspend completion */
uint8_t zl_suspending; /* log is currently suspending */
uint8_t zl_keep_first; /* keep first log block in destroy */
- uint8_t zl_stop_replay; /* don't replay any further */
+ uint8_t zl_replay; /* replaying records while set */
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 8c8efcdefbbb..d7c0febdfc72 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -132,12 +132,15 @@ enum zio_compress {
#define ZIO_FLAG_IO_RETRY 0x00400
#define ZIO_FLAG_IO_REWRITE 0x00800
-#define ZIO_FLAG_PROBE 0x01000
+#define ZIO_FLAG_SELF_HEAL 0x01000
#define ZIO_FLAG_RESILVER 0x02000
#define ZIO_FLAG_SCRUB 0x04000
#define ZIO_FLAG_SCRUB_THREAD 0x08000
-#define ZIO_FLAG_GANG_CHILD 0x10000
+#define ZIO_FLAG_PROBE 0x10000
+#define ZIO_FLAG_GANG_CHILD 0x20000
+#define ZIO_FLAG_RAW 0x40000
+#define ZIO_FLAG_GODFATHER 0x80000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
@@ -146,6 +149,7 @@ enum zio_compress {
ZIO_FLAG_DONT_RETRY | \
ZIO_FLAG_DONT_CACHE | \
ZIO_FLAG_DONT_AGGREGATE | \
+ ZIO_FLAG_SELF_HEAL | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD)
@@ -156,6 +160,14 @@ enum zio_compress {
ZIO_FLAG_IO_RETRY | \
ZIO_FLAG_PROBE)
+#define ZIO_FLAG_AGG_INHERIT \
+ (ZIO_FLAG_DONT_AGGREGATE | \
+ ZIO_FLAG_IO_REPAIR | \
+ ZIO_FLAG_SELF_HEAL | \
+ ZIO_FLAG_RESILVER | \
+ ZIO_FLAG_SCRUB | \
+ ZIO_FLAG_SCRUB_THREAD)
+
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
@@ -254,6 +266,13 @@ typedef int zio_pipe_stage_t(zio_t *zio);
#define ZIO_REEXECUTE_NOW 0x01
#define ZIO_REEXECUTE_SUSPEND 0x02
+typedef struct zio_link {
+ zio_t *zl_parent;
+ zio_t *zl_child;
+ list_node_t zl_parent_node;
+ list_node_t zl_child_node;
+} zio_link_t;
+
struct zio {
/* Core information about this I/O */
zbookmark_t io_bookmark;
@@ -263,15 +282,14 @@ struct zio {
int io_cmd;
uint8_t io_priority;
uint8_t io_reexecute;
- uint8_t io_async_root;
+ uint8_t io_state[ZIO_WAIT_TYPES];
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
blkptr_t io_bp_copy;
- zio_t *io_parent;
- zio_t *io_child;
- zio_t *io_sibling_prev;
- zio_t *io_sibling_next;
+ list_t io_parent_list;
+ list_t io_child_list;
+ zio_link_t *io_walk_link;
zio_t *io_logical;
zio_transform_t *io_transform_stack;
@@ -294,8 +312,6 @@ struct zio {
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
- zio_t *io_delegate_list;
- zio_t *io_delegate_next;
/* Internal pipeline state */
int io_flags;
@@ -308,6 +324,7 @@ struct zio {
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
uint64_t *io_stall;
+ zio_t *io_gang_leader;
zio_gang_node_t *io_gang_tree;
void *io_executor;
void *io_waiter;
@@ -323,7 +340,7 @@ struct zio {
#endif
};
-extern zio_t *zio_null(zio_t *pio, spa_t *spa,
+extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_root(spa_t *spa,
@@ -371,6 +388,11 @@ extern void zio_nowait(zio_t *zio);
extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
+extern zio_t *zio_walk_parents(zio_t *cio);
+extern zio_t *zio_walk_children(zio_t *pio);
+extern zio_t *zio_unique_parent(zio_t *cio);
+extern void zio_add_child(zio_t *pio, zio_t *cio);
+
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
@@ -397,7 +419,7 @@ extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
extern void zio_suspend(spa_t *spa, zio_t *zio);
-extern void zio_resume(spa_t *spa);
+extern int zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index daab40908458..befc8b36bc3f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -327,8 +327,10 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&vd-&g