aboutsummaryrefslogtreecommitdiff
path: root/cddl
diff options
context:
space:
mode:
authorPawel Jakub Dawidek <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
committerPawel Jakub Dawidek <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
commit10b9d77bf1ccf2f3affafa6261692cb92cf7e992 (patch)
treeef515cadc08bf427e4d3f1360199ec9827b1596b /cddl
parente02dd14a548a89bee6657d9eacb0f992bf61b280 (diff)
downloadsrc-10b9d77bf1ccf2f3affafa6261692cb92cf7e992.tar.gz
src-10b9d77bf1ccf2f3affafa6261692cb92cf7e992.zip
Finally... Import the latest open-source ZFS version - (SPA) 28.
Few new things available from now on: - Data deduplication. - Triple parity RAIDZ (RAIDZ3). - zfs diff. - zpool split. - Snapshot holds. - zpool import -F. Allows to rewind corrupted pool to earlier transaction group. - Possibility to import pool in read-only mode. MFC after: 1 month
Notes
Notes: svn path=/head/; revision=219089
Diffstat (limited to 'cddl')
-rw-r--r--cddl/compat/opensolaris/include/fcntl.h3
-rw-r--r--cddl/compat/opensolaris/include/mnttab.h4
-rw-r--r--cddl/compat/opensolaris/include/priv.h2
-rw-r--r--cddl/compat/opensolaris/include/sha2.h38
-rw-r--r--cddl/compat/opensolaris/include/solaris.h6
-rw-r--r--cddl/compat/opensolaris/include/thread_pool.h39
-rw-r--r--cddl/compat/opensolaris/misc/fsshare.c14
-rw-r--r--cddl/compat/opensolaris/misc/zmount.c5
-rw-r--r--cddl/contrib/opensolaris/cmd/stat/common/statcommon.h50
-rw-r--r--cddl/contrib/opensolaris/cmd/stat/common/timestamp.c49
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c2241
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c131
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs.8580
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c29
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h1
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_main.c3667
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_util.h6
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/translate.c55
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.c255
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.h9
-rw-r--r--cddl/contrib/opensolaris/cmd/zlook/zlook.c411
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool.8684
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_main.c977
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.c20
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.h8
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c471
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.167
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c429
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c4851
-rw-r--r--cddl/contrib/opensolaris/head/synch.h27
-rw-r--r--cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c783
-rw-r--r--cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h160
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h13
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c41
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c33
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c56
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h211
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c94
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c22
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c1503
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c832
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c452
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h53
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c598
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c354
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c1533
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c1346
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c99
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c185
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/kernel.c128
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h109
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/taskq.c49
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/util.c5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/allow.py16
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py37
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/holds.py75
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c117
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/table.py70
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py77
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/util.py13
-rw-r--r--cddl/lib/libzfs/Makefile29
-rw-r--r--cddl/lib/libzpool/Makefile18
-rw-r--r--cddl/sbin/zfs/Makefile8
-rw-r--r--cddl/sbin/zpool/Makefile15
-rw-r--r--cddl/usr.bin/Makefile4
-rw-r--r--cddl/usr.bin/zlook/Makefile25
-rw-r--r--cddl/usr.bin/zstreamdump/Makefile27
-rw-r--r--cddl/usr.bin/ztest/Makefile1
-rw-r--r--cddl/usr.sbin/zdb/Makefile1
72 files changed, 18054 insertions, 6282 deletions
diff --git a/cddl/compat/opensolaris/include/fcntl.h b/cddl/compat/opensolaris/include/fcntl.h
index 9b6c3f9ee62d..548918aaab3a 100644
--- a/cddl/compat/opensolaris/include/fcntl.h
+++ b/cddl/compat/opensolaris/include/fcntl.h
@@ -32,6 +32,7 @@
#include_next <fcntl.h>
-#define open64 open
+#define open64(...) open(__VA_ARGS__)
+#define openat64(...) openat(__VA_ARGS__)
#endif
diff --git a/cddl/compat/opensolaris/include/mnttab.h b/cddl/compat/opensolaris/include/mnttab.h
index a18dd8d1893b..227196a4017f 100644
--- a/cddl/compat/opensolaris/include/mnttab.h
+++ b/cddl/compat/opensolaris/include/mnttab.h
@@ -12,6 +12,10 @@
#define MNTTAB _PATH_DEVZERO
#define MNT_LINE_MAX 1024
+#define MS_OVERLAY 0x0
+#define MS_NOMNTTAB 0x0
+#define MS_RDONLY 0x1
+
#define umount2(p, f) unmount(p, f)
struct mnttab {
diff --git a/cddl/compat/opensolaris/include/priv.h b/cddl/compat/opensolaris/include/priv.h
index 32696ae5668b..2fee5b0d40c8 100644
--- a/cddl/compat/opensolaris/include/priv.h
+++ b/cddl/compat/opensolaris/include/priv.h
@@ -10,7 +10,7 @@
#define PRIV_SYS_CONFIG 0
static __inline int
-priv_ineffect(priv)
+priv_ineffect(int priv)
{
assert(priv == PRIV_SYS_CONFIG);
diff --git a/cddl/compat/opensolaris/include/sha2.h b/cddl/compat/opensolaris/include/sha2.h
new file mode 100644
index 000000000000..488f2dbd8b47
--- /dev/null
+++ b/cddl/compat/opensolaris/include/sha2.h
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SHA2_H_
+#define _OPENSOLARIS_SHA2_H_
+
+#include_next <sha256.h>
+
+#define SHA256Init(c) SHA256_Init(c)
+#define SHA256Update(c, d, s) SHA256_Update((c), (d), (s))
+#define SHA256Final(b, c) SHA256_Final((unsigned char *)(b), (c))
+
+#endif /* !_OPENSOLARIS_SHA2_H_ */
diff --git a/cddl/compat/opensolaris/include/solaris.h b/cddl/compat/opensolaris/include/solaris.h
index 01f9d4776abc..9bead018bcec 100644
--- a/cddl/compat/opensolaris/include/solaris.h
+++ b/cddl/compat/opensolaris/include/solaris.h
@@ -5,6 +5,10 @@
#include <sys/ccompile.h>
-#define dirent64 dirent
+#include <fcntl.h>
+
+#define NOTE(s)
+
+int mkdirp(const char *, mode_t);
#endif /* !_SOLARIS_H_ */
diff --git a/cddl/compat/opensolaris/include/thread_pool.h b/cddl/compat/opensolaris/include/thread_pool.h
new file mode 100644
index 000000000000..25ac55dedea7
--- /dev/null
+++ b/cddl/compat/opensolaris/include/thread_pool.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_THREAD_POOL_H_
+#define _OPENSOLARIS_THREAD_POOL_H_
+
+typedef int tpool_t;
+
+#define tpool_create(a, b, c, d) (0)
+#define tpool_dispatch(pool, func, arg) func(arg)
+#define tpool_wait(pool) do { } while (0)
+#define tpool_destroy(pool) do { } while (0)
+
+#endif /* !_OPENSOLARIS_THREAD_POOL_H_ */
diff --git a/cddl/compat/opensolaris/misc/fsshare.c b/cddl/compat/opensolaris/misc/fsshare.c
index 10ed591d5bca..e8faa928d6fb 100644
--- a/cddl/compat/opensolaris/misc/fsshare.c
+++ b/cddl/compat/opensolaris/misc/fsshare.c
@@ -28,15 +28,17 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
+
+#include <assert.h>
#include <errno.h>
+#include <fcntl.h>
+#include <fsshare.h>
#include <libutil.h>
-#include <assert.h>
#include <pathnames.h> /* _PATH_MOUNTDPID */
-#include <fsshare.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
#define FILE_HEADER "# !!! DO NOT EDIT THIS FILE MANUALLY !!!\n\n"
#define OPTSSIZE 1024
diff --git a/cddl/compat/opensolaris/misc/zmount.c b/cddl/compat/opensolaris/misc/zmount.c
index 493a4fc4ef12..b4f99e3be9fd 100644
--- a/cddl/compat/opensolaris/misc/zmount.c
+++ b/cddl/compat/opensolaris/misc/zmount.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <mnttab.h>
static void
build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
@@ -78,7 +79,7 @@ zmount(const char *spec, const char *dir, int mflag, char *fstype,
assert(spec != NULL);
assert(dir != NULL);
- assert(mflag == 0);
+ assert(mflag == 0 || mflag == MS_RDONLY);
assert(fstype != NULL);
assert(strcmp(fstype, MNTTYPE_ZFS) == 0);
assert(dataptr == NULL);
@@ -91,6 +92,8 @@ zmount(const char *spec, const char *dir, int mflag, char *fstype,
iov = NULL;
iovlen = 0;
+ if (mflag & MS_RDONLY)
+ build_iovec(&iov, &iovlen, "ro", NULL, 0);
build_iovec(&iov, &iovlen, "fstype", fstype, (size_t)-1);
build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir),
(size_t)-1);
diff --git a/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h b/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
new file mode 100644
index 000000000000..f82495f22b5f
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Common routines for acquiring snapshots of kstats for
+ * iostat, mpstat, and vmstat.
+ */
+
+#ifndef _STATCOMMON_H
+#define _STATCOMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <time.h>
+
+#define NODATE 0 /* Default: No time stamp */
+#define DDATE 1 /* Standard date format */
+#define UDATE 2 /* Internal representation of Unix time */
+
+/* Print a timestamp in either Unix or standard format. */
+void print_timestamp(uint_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STATCOMMON_H */
diff --git a/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c b/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
new file mode 100644
index 000000000000..be7b30c29fd0
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
@@ -0,0 +1,49 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include "statcommon.h"
+
+#include <langinfo.h>
+
+/*
+ * Print timestamp as decimal reprentation of time_t value (-T u was specified)
+ * or in date(1) format (-T d was specified).
+ */
+void
+print_timestamp(uint_t timestamp_fmt)
+{
+ time_t t = time(NULL);
+
+ if (timestamp_fmt == UDATE) {
+ (void) printf("%ld\n", t);
+ } else if (timestamp_fmt == DDATE) {
+ char dstr[64];
+ int len;
+
+ len = strftime(dstr, sizeof (dstr), "%+", localtime(&t));
+ if (len > 0)
+ (void) printf("%s\n", dstr);
+ }
+}
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index 915ea1917a0e..c6e219df9e1d 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <stdio.h>
@@ -34,6 +33,9 @@
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
@@ -51,10 +53,25 @@
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
+#include <sys/ddt.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
+#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
+ zio_compress_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
+ zio_checksum_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
+ dmu_ot[(idx)].ot_name : "UNKNOWN")
+#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
+
+#ifndef lint
+extern int zfs_recover;
+#else
+int zfs_recover;
+#endif
+
const char cmdname[] = "zdb";
uint8_t dump_opt[256];
@@ -64,8 +81,6 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
-boolean_t zdb_sig_user_data = B_TRUE;
-int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -87,39 +102,56 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-udibcsvL] [-U cachefile_path] [-t txg]\n"
- "\t [-S user:cksumalg] "
- "dataset [object...]\n"
- " %s -C [pool]\n"
- " %s -l dev\n"
- " %s -R pool:vdev:offset:size:flags\n"
- " %s [-p path_to_vdev_dir]\n"
- " %s -e pool | GUID | devid ...\n",
- cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
-
- (void) fprintf(stderr, " -u uberblock\n");
- (void) fprintf(stderr, " -d datasets\n");
- (void) fprintf(stderr, " -C cached pool configuration\n");
- (void) fprintf(stderr, " -i intent logs\n");
- (void) fprintf(stderr, " -b block statistics\n");
- (void) fprintf(stderr, " -m metaslabs\n");
- (void) fprintf(stderr, " -c checksum all metadata (twice for "
+ "Usage: %s [-CumdibcsDvhL] poolname [object...]\n"
+ " %s [-div] dataset [object...]\n"
+ " %s -m [-L] poolname [vdev [metaslab...]]\n"
+ " %s -R poolname vdev:offset:size[:flags]\n"
+ " %s -S poolname\n"
+ " %s -l [-u] device\n"
+ " %s -C\n\n",
+ cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
+
+ (void) fprintf(stderr, " Dataset name must include at least one "
+ "separator character '/' or '@'\n");
+ (void) fprintf(stderr, " If dataset name is specified, only that "
+ "dataset is dumped\n");
+ (void) fprintf(stderr, " If object numbers are specified, only "
+ "those objects are dumped\n\n");
+ (void) fprintf(stderr, " Options to control amount of output:\n");
+ (void) fprintf(stderr, " -u uberblock\n");
+ (void) fprintf(stderr, " -d dataset(s)\n");
+ (void) fprintf(stderr, " -i intent logs\n");
+ (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
+ (void) fprintf(stderr, " -h pool history\n");
+ (void) fprintf(stderr, " -b block statistics\n");
+ (void) fprintf(stderr, " -m metaslabs\n");
+ (void) fprintf(stderr, " -c checksum all metadata (twice for "
"all data) blocks\n");
- (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
- (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
- "dump blkptr signatures\n");
- (void) fprintf(stderr, " -v verbose (applies to all others)\n");
+ (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
+ (void) fprintf(stderr, " -D dedup statistics\n");
+ (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
+ (void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
(void) fprintf(stderr, " -L disable leak tracking (do not "
"load spacemaps)\n");
- (void) fprintf(stderr, " -U cachefile_path -- use alternate "
- "cachefile\n");
(void) fprintf(stderr, " -R read and display block from a "
- "device\n");
- (void) fprintf(stderr, " -e Pool is exported/destroyed/"
- "has altroot\n");
- (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
- (void) fprintf(stderr, " -t <txg> highest txg to use when "
+ "device\n\n");
+ (void) fprintf(stderr, " Below options are intended for use "
+ "with other options (except -l):\n");
+ (void) fprintf(stderr, " -A ignore assertions (-A), enable "
+ "panic recovery (-AA) or both (-AAA)\n");
+ (void) fprintf(stderr, " -F attempt automatic rewind within "
+ "safe range of transaction groups\n");
+ (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
+ "cachefile\n");
+ (void) fprintf(stderr, " -X attempt extreme rewind (does not "
+ "work with dataset)\n");
+ (void) fprintf(stderr, " -e pool is exported/destroyed/"
+ "has altroot/not in a cachefile\n");
+ (void) fprintf(stderr, " -p <path> -- use one or more with "
+ "-e to specify path to vdev dir\n");
+ (void) fprintf(stderr, " -P print numbers parsable\n");
+ (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
@@ -146,68 +178,6 @@ fatal(const char *fmt, ...)
exit(1);
}
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
- nvpair_t *elem = NULL;
-
- while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
-
- VERIFY(nvpair_value_string(elem, &value) == 0);
- (void) printf("%*s%s='%s'\n", indent, "",
- nvpair_name(elem), value);
- }
- break;
-
- case DATA_TYPE_UINT64:
- {
- uint64_t value;
-
- VERIFY(nvpair_value_uint64(elem, &value) == 0);
- (void) printf("%*s%s=%llu\n", indent, "",
- nvpair_name(elem), (u_longlong_t)value);
- }
- break;
-
- case DATA_TYPE_NVLIST:
- {
- nvlist_t *value;
-
- VERIFY(nvpair_value_nvlist(elem, &value) == 0);
- (void) printf("%*s%s\n", indent, "",
- nvpair_name(elem));
- dump_nvlist(value, indent + 4);
- }
- break;
-
- case DATA_TYPE_NVLIST_ARRAY:
- {
- nvlist_t **value;
- uint_t c, count;
-
- VERIFY(nvpair_value_nvlist_array(elem, &value,
- &count) == 0);
-
- for (c = 0; c < count; c++) {
- (void) printf("%*s%s[%u]\n", indent, "",
- nvpair_name(elem), c);
- dump_nvlist(value[c], indent + 8);
- }
- }
- break;
-
- default:
-
- (void) printf("bad config type %d for %s\n",
- nvpair_type(elem), nvpair_name(elem));
- }
- }
-}
-
/* ARGSUSED */
static void
dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
@@ -227,6 +197,15 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
nvlist_free(nv);
}
+static void
+zdb_nicenum(uint64_t num, char *buf)
+{
+ if (dump_opt['P'])
+ (void) sprintf(buf, "%llu", (longlong_t)num);
+ else
+ nicenum(num, buf);
+}
+
const char dump_zap_stars[] = "****************************************";
const int dump_zap_width = sizeof (dump_zap_stars) - 1;
@@ -325,6 +304,13 @@ dump_none(objset_t *os, uint64_t object, void *data, size_t size)
}
/*ARGSUSED*/
+static void
+dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ (void) printf("\tUNKNOWN OBJECT TYPE\n");
+}
+
+/*ARGSUSED*/
void
dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
{
@@ -388,6 +374,79 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
/*ARGSUSED*/
static void
+dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ dump_zap_stats(os, object);
+ /* contents are printed elsewhere, properly decoded */
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = ", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+ (void) printf(" %llx : [%d:%d:%d]\n",
+ (u_longlong_t)attr.za_first_integer,
+ (int)ATTR_LENGTH(attr.za_first_integer),
+ (int)ATTR_BSWAP(attr.za_first_integer),
+ (int)ATTR_NUM(attr.za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+ uint16_t *layout_attrs;
+ int i;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = [", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+
+ VERIFY(attr.za_integer_length == 2);
+ layout_attrs = umem_zalloc(attr.za_num_integers *
+ attr.za_integer_length, UMEM_NOFAIL);
+
+ VERIFY(zap_lookup(os, object, attr.za_name,
+ attr.za_integer_length,
+ attr.za_num_integers, layout_attrs) == 0);
+
+ for (i = 0; i != attr.za_num_integers; i++)
+ (void) printf(" %d ", (int)layout_attrs[i]);
+ (void) printf("]\n");
+ umem_free(layout_attrs,
+ attr.za_num_integers * attr.za_integer_length);
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
@@ -441,17 +500,17 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
*/
alloc = 0;
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
- VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+ VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
sizeof (entry), &entry, DMU_READ_PREFETCH));
if (SM_DEBUG_DECODE(entry)) {
- (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
+ (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
(u_longlong_t)(offset / sizeof (entry)),
ddata[SM_DEBUG_ACTION_DECODE(entry)],
(u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
} else {
- (void) printf("\t\t[%4llu] %c range:"
- " %08llx-%08llx size: %06llx\n",
+ (void) printf("\t [%6llu] %c range:"
+ " %010llx-%010llx size: %06llx\n",
(u_longlong_t)(offset / sizeof (entry)),
SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
(u_longlong_t)((SM_OFFSET_DECODE(entry) <<
@@ -476,14 +535,14 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
static void
dump_metaslab_stats(metaslab_t *msp)
{
- char maxbuf[5];
+ char maxbuf[32];
space_map_t *sm = &msp->ms_map;
avl_tree_t *t = sm->sm_pp_root;
int free_pct = sm->sm_space * 100 / sm->sm_size;
- nicenum(space_map_maxsize(sm), maxbuf);
+ zdb_nicenum(space_map_maxsize(sm), maxbuf);
- (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n",
+ (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
"segments", avl_numnodes(t), "maxsize", maxbuf,
"freepct", free_pct);
}
@@ -495,16 +554,16 @@ dump_metaslab(metaslab_t *msp)
spa_t *spa = vd->vdev_spa;
space_map_t *sm = &msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
- char freebuf[5];
+ char freebuf[32];
- nicenum(sm->sm_size - smo->smo_alloc, freebuf);
+ zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
(void) printf(
- "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
+ "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
(u_longlong_t)(sm->sm_start / sm->sm_size),
(u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
- if (dump_opt['m'] > 1) {
+ if (dump_opt['m'] > 1 && !dump_opt['L']) {
mutex_enter(&msp->ms_lock);
space_map_load_wait(sm);
if (!sm->sm_loaded)
@@ -525,22 +584,52 @@ dump_metaslab(metaslab_t *msp)
}
static void
+print_vdev_metaslab_header(vdev_t *vd)
+{
+ (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
+ (u_longlong_t)vd->vdev_id,
+ "metaslabs", (u_longlong_t)vd->vdev_ms_count,
+ "offset", "spacemap", "free");
+ (void) printf("\t%15s %19s %15s %10s\n",
+ "---------------", "-------------------",
+ "---------------", "-------------");
+}
+
+static void
dump_metaslabs(spa_t *spa)
{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
+ vdev_t *vd, *rvd = spa->spa_root_vdev;
+ uint64_t m, c = 0, children = rvd->vdev_children;
(void) printf("\nMetaslabs:\n");
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
+ if (!dump_opt['d'] && zopt_objects > 0) {
+ c = zopt_object[0];
+
+ if (c >= children)
+ (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
- (void) printf("\t%-10s %-19s %-15s %-10s\n",
- "vdev", "offset", "spacemap", "free");
- (void) printf("\t%10s %19s %15s %10s\n",
- "----------", "-------------------",
- "---------------", "-------------");
+ if (zopt_objects > 1) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
+
+ for (m = 1; m < zopt_objects; m++) {
+ if (zopt_object[m] < vd->vdev_ms_count)
+ dump_metaslab(
+ vd->vdev_ms[zopt_object[m]]);
+ else
+ (void) fprintf(stderr, "bad metaslab "
+ "number %llu\n",
+ (u_longlong_t)zopt_object[m]);
+ }
+ (void) printf("\n");
+ return;
+ }
+ children = c + 1;
+ }
+ for (; c < children; c++) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
for (m = 0; m < vd->vdev_ms_count; m++)
dump_metaslab(vd->vdev_ms[m]);
@@ -549,6 +638,133 @@ dump_metaslabs(spa_t *spa)
}
static void
+dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
+{
+ const ddt_phys_t *ddp = dde->dde_phys;
+ const ddt_key_t *ddk = &dde->dde_key;
+ char *types[4] = { "ditto", "single", "double", "triple" };
+ char blkbuf[BP_SPRINTF_LEN];
+ blkptr_t blk;
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
+ sprintf_blkptr(blkbuf, &blk);
+ (void) printf("index %llx refcnt %llu %s %s\n",
+ (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
+ types[p], blkbuf);
+ }
+}
+
+static void
+dump_dedup_ratio(const ddt_stat_t *dds)
+{
+ double rL, rP, rD, D, dedup, compress, copies;
+
+ if (dds->dds_blocks == 0)
+ return;
+
+ rL = (double)dds->dds_ref_lsize;
+ rP = (double)dds->dds_ref_psize;
+ rD = (double)dds->dds_ref_dsize;
+ D = (double)dds->dds_dsize;
+
+ dedup = rD / D;
+ compress = rL / rP;
+ copies = rD / rP;
+
+ (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
+ "dedup * compress / copies = %.2f\n\n",
+ dedup, compress, copies, dedup * compress / copies);
+}
+
+static void
+dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+{
+ char name[DDT_NAMELEN];
+ ddt_entry_t dde;
+ uint64_t walk = 0;
+ dmu_object_info_t doi;
+ uint64_t count, dspace, mspace;
+ int error;
+
+ error = ddt_object_info(ddt, type, class, &doi);
+
+ if (error == ENOENT)
+ return;
+ ASSERT(error == 0);
+
+ if ((count = ddt_object_count(ddt, type, class)) == 0)
+ return;
+
+ dspace = doi.doi_physical_blocks_512 << 9;
+ mspace = doi.doi_fill_count * doi.doi_data_block_size;
+
+ ddt_object_name(ddt, type, class, name);
+
+ (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
+ name,
+ (u_longlong_t)count,
+ (u_longlong_t)(dspace / count),
+ (u_longlong_t)(mspace / count));
+
+ if (dump_opt['D'] < 3)
+ return;
+
+ zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
+
+ if (dump_opt['D'] < 4)
+ return;
+
+ if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
+ return;
+
+ (void) printf("%s contents:\n\n", name);
+
+ while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
+ dump_dde(ddt, &dde, walk);
+
+ ASSERT(error == ENOENT);
+
+ (void) printf("\n");
+}
+
+static void
+dump_all_ddts(spa_t *spa)
+{
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ ddt_t *ddt = spa->spa_ddt[c];
+ for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (enum ddt_class class = 0; class < DDT_CLASSES;
+ class++) {
+ dump_ddt(ddt, type, class);
+ }
+ }
+ }
+
+ ddt_get_dedup_stats(spa, &dds_total);
+
+ if (dds_total.dds_blocks == 0) {
+ (void) printf("All DDTs are empty\n");
+ return;
+ }
+
+ (void) printf("\n");
+
+ if (dump_opt['D'] > 1) {
+ (void) printf("DDT histogram (aggregated over all DDTs):\n");
+ ddt_get_dedup_histogram(spa, &ddh_total);
+ zpool_dump_ddt(&dds_total, &ddh_total);
+ }
+
+ dump_dedup_ratio(&dds_total);
+}
+
+static void
dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
{
char *prefix = (void *)sm;
@@ -568,7 +784,7 @@ dump_dtl(vdev_t *vd, int indent)
char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
char prefix[256];
- spa_vdev_state_enter(spa);
+ spa_vdev_state_enter(spa, SCL_NONE);
required = vdev_dtl_required(vd);
(void) spa_vdev_state_exit(spa, NULL, 0);
@@ -598,6 +814,68 @@ dump_dtl(vdev_t *vd, int indent)
dump_dtl(vd->vdev_child[c], indent + 4);
}
+static void
+dump_history(spa_t *spa)
+{
+ nvlist_t **events = NULL;
+ char buf[SPA_MAXBLOCKSIZE];
+ uint64_t resid, len, off = 0;
+ uint_t num = 0;
+ int error;
+ time_t tsec;
+ struct tm t;
+ char tbuf[30];
+ char internalstr[MAXPATHLEN];
+
+ do {
+ len = sizeof (buf);
+
+ if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
+ (void) fprintf(stderr, "Unable to read history: "
+ "error %d\n", error);
+ return;
+ }
+
+ if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
+ break;
+
+ off -= resid;
+ } while (len != 0);
+
+ (void) printf("\nHistory:\n");
+ for (int i = 0; i < num; i++) {
+ uint64_t time, txg, ievent;
+ char *cmd, *intstr;
+
+ if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
+ &time) != 0)
+ continue;
+ if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
+ &cmd) != 0) {
+ if (nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+ continue;
+ verify(nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_TXG, &txg) == 0);
+ verify(nvlist_lookup_string(events[i],
+ ZPOOL_HIST_INT_STR, &intstr) == 0);
+ if (ievent >= LOG_END)
+ continue;
+
+ (void) snprintf(internalstr,
+ sizeof (internalstr),
+ "[internal %s txg:%lld] %s",
+ zfs_history_event_names[ievent], txg,
+ intstr);
+ cmd = internalstr;
+ }
+ tsec = time;
+ (void) localtime_r(&tsec, &t);
+ (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+ (void) printf("%s %s\n", tbuf, cmd);
+ }
+}
+
/*ARGSUSED*/
static void
dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
@@ -605,35 +883,48 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
}
static uint64_t
-blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
+blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
{
- if (level < 0)
- return (blkid);
+ if (dnp == NULL) {
+ ASSERT(zb->zb_level < 0);
+ if (zb->zb_object == 0)
+ return (zb->zb_blkid);
+ return (zb->zb_blkid * BP_GET_LSIZE(bp));
+ }
- return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
+ ASSERT(zb->zb_level >= 0);
+
+ return ((zb->zb_blkid <<
+ (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
}
static void
-sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
+sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
{
- dva_t *dva = bp->blk_dva;
- int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
- int i;
+ const dva_t *dva = bp->blk_dva;
+ int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
+
+ if (dump_opt['b'] >= 5) {
+ sprintf_blkptr(blkbuf, bp);
+ return;
+ }
blkbuf[0] = '\0';
- for (i = 0; i < ndvas; i++)
+ for (int i = 0; i < ndvas; i++)
(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
(u_longlong_t)DVA_GET_VDEV(&dva[i]),
(u_longlong_t)DVA_GET_OFFSET(&dva[i]),
(u_longlong_t)DVA_GET_ASIZE(&dva[i]));
- (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
+ (void) sprintf(blkbuf + strlen(blkbuf),
+ "%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_birth);
+ (u_longlong_t)bp->blk_birth,
+ (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
static void
@@ -646,8 +937,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
- (void) printf("%16llx ",
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
+ (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
ASSERT(zb->zb_level >= 0);
@@ -659,23 +949,15 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
}
}
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+ sprintf_blkptr_compact(blkbuf, bp);
(void) printf("%s\n", blkbuf);
}
-#define SET_BOOKMARK(zb, objset, object, level, blkid) \
-{ \
- (zb)->zb_objset = objset; \
- (zb)->zb_object = object; \
- (zb)->zb_level = level; \
- (zb)->zb_blkid = blkid; \
-}
-
static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_t *zb)
{
- int err;
+ int err = 0;
if (bp->blk_birth == 0)
return (0);
@@ -694,6 +976,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
return (err);
+ ASSERT(buf->b_data);
/* recursively visit blocks below this */
cbp = buf->b_data;
@@ -726,11 +1009,11 @@ dump_indirect(dnode_t *dn)
(void) printf("Indirect blocks:\n");
- SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+ SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
dn->dn_object, dnp->dn_nlevels - 1, 0);
for (j = 0; j < dnp->dn_nblkptr; j++) {
czb.zb_blkid = j;
- (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+ (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
&dnp->dn_blkptr[j], &czb);
}
@@ -743,7 +1026,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dir_phys_t *dd = data;
time_t crtime;
- char nice[6];
+ char nice[32];
if (dd == NULL)
return;
@@ -760,15 +1043,15 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_origin_obj);
(void) printf("\t\tchild_dir_zapobj = %llu\n",
(u_longlong_t)dd->dd_child_dir_zapobj);
- nicenum(dd->dd_used_bytes, nice);
+ zdb_nicenum(dd->dd_used_bytes, nice);
(void) printf("\t\tused_bytes = %s\n", nice);
- nicenum(dd->dd_compressed_bytes, nice);
+ zdb_nicenum(dd->dd_compressed_bytes, nice);
(void) printf("\t\tcompressed_bytes = %s\n", nice);
- nicenum(dd->dd_uncompressed_bytes, nice);
+ zdb_nicenum(dd->dd_uncompressed_bytes, nice);
(void) printf("\t\tuncompressed_bytes = %s\n", nice);
- nicenum(dd->dd_quota, nice);
+ zdb_nicenum(dd->dd_quota, nice);
(void) printf("\t\tquota = %s\n", nice);
- nicenum(dd->dd_reserved, nice);
+ zdb_nicenum(dd->dd_reserved, nice);
(void) printf("\t\treserved = %s\n", nice);
(void) printf("\t\tprops_zapobj = %llu\n",
(u_longlong_t)dd->dd_props_zapobj);
@@ -778,7 +1061,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_flags);
#define DO(which) \
- nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+ zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
DO(HEAD);
DO(SNAP);
@@ -794,7 +1077,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dataset_phys_t *ds = data;
time_t crtime;
- char used[6], compressed[6], uncompressed[6], unique[6];
+ char used[32], compressed[32], uncompressed[32], unique[32];
char blkbuf[BP_SPRINTF_LEN];
if (ds == NULL)
@@ -802,11 +1085,11 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
ASSERT(size == sizeof (*ds));
crtime = ds->ds_creation_time;
- nicenum(ds->ds_used_bytes, used);
- nicenum(ds->ds_compressed_bytes, compressed);
- nicenum(ds->ds_uncompressed_bytes, uncompressed);
- nicenum(ds->ds_unique_bytes, unique);
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
+ zdb_nicenum(ds->ds_used_bytes, used);
+ zdb_nicenum(ds->ds_compressed_bytes, compressed);
+ zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
+ zdb_nicenum(ds->ds_unique_bytes, unique);
+ sprintf_blkptr(blkbuf, &ds->ds_bp);
(void) printf("\t\tdir_obj = %llu\n",
(u_longlong_t)ds->ds_dir_obj);
@@ -820,6 +1103,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)ds->ds_snapnames_zapobj);
(void) printf("\t\tnum_children = %llu\n",
(u_longlong_t)ds->ds_num_children);
+ (void) printf("\t\tuserrefs_obj = %llu\n",
+ (u_longlong_t)ds->ds_userrefs_obj);
(void) printf("\t\tcreation_time = %s", ctime(&crtime));
(void) printf("\t\tcreation_txg = %llu\n",
(u_longlong_t)ds->ds_creation_txg);
@@ -842,63 +1127,88 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(void) printf("\t\tbp = %s\n", blkbuf);
}
+/* ARGSUSED */
+static int
+dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ char blkbuf[BP_SPRINTF_LEN];
+
+ ASSERT(bp->blk_birth != 0);
+ sprintf_blkptr_compact(blkbuf, bp);
+ (void) printf("\t%s\n", blkbuf);
+ return (0);
+}
+
static void
-dump_bplist(objset_t *mos, uint64_t object, char *name)
+dump_bpobj(bpobj_t *bpo, char *name)
{
- bplist_t bpl = { 0 };
- blkptr_t blk, *bp = &blk;
- uint64_t itor = 0;
- char bytes[6];
- char comp[6];
- char uncomp[6];
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
if (dump_opt['d'] < 3)
return;
- mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
- VERIFY(0 == bplist_open(&bpl, mos, object));
- if (bplist_empty(&bpl)) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
- return;
- }
-
- nicenum(bpl.bpl_phys->bpl_bytes, bytes);
- if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
- nicenum(bpl.bpl_phys->bpl_comp, comp);
- nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
- (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+ zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
+ if (bpo->bpo_havesubobj) {
+ zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
+ zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
+ (void) printf("\n %s: %llu local blkptrs, %llu subobjs, "
+ "%s (%s/%s comp)\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+ (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
bytes, comp, uncomp);
} else {
- (void) printf("\n %s: %llu entries, %s\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+ (void) printf("\n %s: %llu blkptrs, %s\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
}
- if (dump_opt['d'] < 5) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ if (dump_opt['d'] < 5)
return;
- }
(void) printf("\n");
- while (bplist_iterate(&bpl, &itor, bp) == 0) {
- char blkbuf[BP_SPRINTF_LEN];
+ (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
+}
- ASSERT(bp->blk_birth != 0);
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
- (void) printf("\tItem %3llu: %s\n",
- (u_longlong_t)itor - 1, blkbuf);
- }
+static void
+dump_deadlist(dsl_deadlist_t *dl)
+{
+ dsl_deadlist_entry_t *dle;
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
+
+ if (dump_opt['d'] < 3)
+ return;
+
+ zdb_nicenum(dl->dl_phys->dl_used, bytes);
+ zdb_nicenum(dl->dl_phys->dl_comp, comp);
+ zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
+ (void) printf("\n Deadlist: %s (%s/%s comp)\n",
+ bytes, comp, uncomp);
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ if (dump_opt['d'] < 4)
+ return;
+
+ (void) printf("\n");
+
+ for (dle = avl_first(&dl->dl_tree); dle;
+ dle = AVL_NEXT(&dl->dl_tree, dle)) {
+ (void) printf(" mintxg %llu -> obj %llu\n",
+ (longlong_t)dle->dle_mintxg,
+ (longlong_t)dle->dle_bpobj.bpo_object);
+
+ if (dump_opt['d'] >= 5)
+ dump_bpobj(&dle->dle_bpobj, "");
+ }
}
static avl_tree_t idx_tree;
static avl_tree_t domain_tree;
static boolean_t fuid_table_loaded;
+static boolean_t sa_loaded;
+sa_attr_type_t *sa_attr_table;
static void
fuid_table_destroy()
@@ -931,12 +1241,12 @@ print_idstr(uint64_t id, const char *id_type)
}
static void
-dump_uidgid(objset_t *os, znode_phys_t *zp)
+dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
{
uint32_t uid_idx, gid_idx;
- uid_idx = FUID_INDEX(zp->zp_uid);
- gid_idx = FUID_INDEX(zp->zp_gid);
+ uid_idx = FUID_INDEX(uid);
+ gid_idx = FUID_INDEX(gid);
/* Load domain table, if not already loaded */
if (!fuid_table_loaded && (uid_idx || gid_idx)) {
@@ -951,50 +1261,111 @@ dump_uidgid(objset_t *os, znode_phys_t *zp)
fuid_table_loaded = B_TRUE;
}
- print_idstr(zp->zp_uid, "uid");
- print_idstr(zp->zp_gid, "gid");
+ print_idstr(uid, "uid");
+ print_idstr(gid, "gid");
}
/*ARGSUSED*/
static void
dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
{
- znode_phys_t *zp = data;
- time_t z_crtime, z_atime, z_mtime, z_ctime;
char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
+ sa_handle_t *hdl;
+ uint64_t xattr, rdev, gen;
+ uint64_t uid, gid, mode, fsize, parent, links;
+ uint64_t pflags;
+ uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
+ time_t z_crtime, z_atime, z_mtime, z_ctime;
+ sa_bulk_attr_t bulk[12];
+ int idx = 0;
int error;
- ASSERT(size >= sizeof (znode_phys_t));
+ if (!sa_loaded) {
+ uint64_t sa_attrs = 0;
+ uint64_t version;
+
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+ 8, 1, &version) == 0);
+ if (version >= ZPL_VERSION_SA) {
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
+ 8, 1, &sa_attrs) == 0);
+ }
+ if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
+ ZPL_END, &sa_attr_table)) != 0) {
+ (void) printf("sa_setup failed errno %d, can't "
+ "display znode contents\n", error);
+ return;
+ }
+ sa_loaded = B_TRUE;
+ }
+
+ if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
+ (void) printf("Failed to get handle for SA znode\n");
+ return;
+ }
+
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
+ &links, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
+ &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
+ NULL, &parent, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
+ &fsize, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
+ acctm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
+ modtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
+ crtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
+ chgtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
+ &pflags, 8);
+
+ if (sa_bulk_lookup(hdl, bulk, idx)) {
+ (void) sa_handle_destroy(hdl);
+ return;
+ }
error = zfs_obj_to_path(os, object, path, sizeof (path));
if (error != 0) {
(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
(u_longlong_t)object);
}
-
if (dump_opt['d'] < 3) {
(void) printf("\t%s\n", path);
+ (void) sa_handle_destroy(hdl);
return;
}
- z_crtime = (time_t)zp->zp_crtime[0];
- z_atime = (time_t)zp->zp_atime[0];
- z_mtime = (time_t)zp->zp_mtime[0];
- z_ctime = (time_t)zp->zp_ctime[0];
+ z_crtime = (time_t)crtm[0];
+ z_atime = (time_t)acctm[0];
+ z_mtime = (time_t)modtm[0];
+ z_ctime = (time_t)chgtm[0];
(void) printf("\tpath %s\n", path);
- dump_uidgid(os, zp);
+ dump_uidgid(os, uid, gid);
(void) printf("\tatime %s", ctime(&z_atime));
(void) printf("\tmtime %s", ctime(&z_mtime));
(void) printf("\tctime %s", ctime(&z_ctime));
(void) printf("\tcrtime %s", ctime(&z_crtime));
- (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen);
- (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode);
- (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size);
- (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
- (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links);
- (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr);
- (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev);
+ (void) printf("\tgen %llu\n", (u_longlong_t)gen);
+ (void) printf("\tmode %llo\n", (u_longlong_t)mode);
+ (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
+ (void) printf("\tparent %llu\n", (u_longlong_t)parent);
+ (void) printf("\tlinks %llu\n", (u_longlong_t)links);
+ (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
+ sa_handle_destroy(hdl);
}
/*ARGSUSED*/
@@ -1009,7 +1380,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
{
}
-static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
+static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_none, /* unallocated */
dump_zap, /* object directory */
dump_uint64, /* object array */
@@ -1051,6 +1422,20 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_zap, /* DSL scrub queue */
dump_zap, /* ZFS user/group used */
dump_zap, /* ZFS user/group quota */
+ dump_zap, /* snapshot refcount tags */
+ dump_ddt_zap, /* DDT ZAP object */
+ dump_zap, /* DDT statistics */
+ dump_znode, /* SA object */
+ dump_zap, /* SA Master Node */
+ dump_sa_attrs, /* SA attribute registration */
+ dump_sa_layouts, /* SA attribute layouts */
+ dump_zap, /* DSL scrub translations */
+ dump_none, /* fake dedup BP */
+ dump_zap, /* deadlist */
+ dump_none, /* deadlist hdr */
+ dump_zap, /* dsl clones */
+ dump_none, /* bpobj subobjs */
+ dump_unknown, /* Unknown type, must be last */
};
static void
@@ -1061,18 +1446,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
dnode_t *dn;
void *bonus = NULL;
size_t bsize = 0;
- char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
+ char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+ char bonus_size[32];
char aux[50];
int error;
if (*print_header) {
- (void) printf("\n Object lvl iblk dblk lsize"
- " asize type\n");
+ (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
+ "%full", "type");
*print_header = 0;
}
if (object == 0) {
- dn = os->os->os_meta_dnode;
+ dn = DMU_META_DNODE(os);
} else {
error = dmu_bonus_hold(os, object, FTAG, &db);
if (error)
@@ -1080,50 +1467,55 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
object, error);
bonus = db->db_data;
bsize = db->db_size;
- dn = ((dmu_buf_impl_t *)db)->db_dnode;
+ dn = DB_DNODE((dmu_buf_impl_t *)db);
}
dmu_object_info_from_dnode(dn, &doi);
- nicenum(doi.doi_metadata_block_size, iblk);
- nicenum(doi.doi_data_block_size, dblk);
- nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
- lsize);
- nicenum(doi.doi_physical_blks << 9, asize);
- nicenum(doi.doi_bonus_size, bonus_size);
+ zdb_nicenum(doi.doi_metadata_block_size, iblk);
+ zdb_nicenum(doi.doi_data_block_size, dblk);
+ zdb_nicenum(doi.doi_max_offset, lsize);
+ zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
+ zdb_nicenum(doi.doi_bonus_size, bonus_size);
+ (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
+ doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
+ doi.doi_max_offset);
aux[0] = '\0';
if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
- zio_checksum_table[doi.doi_checksum].ci_name);
+ ZDB_CHECKSUM_NAME(doi.doi_checksum));
}
if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
- zio_compress_table[doi.doi_compress].ci_name);
+ ZDB_COMPRESS_NAME(doi.doi_compress));
}
- (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
- asize, dmu_ot[doi.doi_type].ot_name, aux);
+ (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
+ (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
+ asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
- (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
- "", "", "", "", bonus_size, "bonus",
- dmu_ot[doi.doi_bonus_type].ot_name);
+ (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "", "", "", "", "", bonus_size, "bonus",
+ ZDB_OT_NAME(doi.doi_bonus_type));
}
if (verbosity >= 4) {
- (void) printf("\tdnode flags: %s%s\n",
+ (void) printf("\tdnode flags: %s%s%s\n",
(dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
"USED_BYTES " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
- "USERUSED_ACCOUNTED " : "");
+ "USERUSED_ACCOUNTED " : "",
+ (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
+ "SPILL_BLKPTR" : "");
(void) printf("\tdnode maxblkid: %llu\n",
(longlong_t)dn->dn_phys->dn_maxblkid);
- object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
- object_viewer[doi.doi_type](os, object, NULL, 0);
+ object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
+ bonus, bsize);
+ object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
*print_header = 1;
}
@@ -1145,6 +1537,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
for (;;) {
+ char segsize[32];
error = dnode_next_offset(dn,
0, &start, minlvl, blkfill, 0);
if (error)
@@ -1152,7 +1545,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
end = start;
error = dnode_next_offset(dn,
DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
- nicenum(end - start, segsize);
+ zdb_nicenum(end - start, segsize);
(void) printf("\t\tsegment [%016llx, %016llx)"
" size %5s\n", (u_longlong_t)start,
(u_longlong_t)end, segsize);
@@ -1175,7 +1568,7 @@ dump_dir(objset_t *os)
dmu_objset_stats_t dds;
uint64_t object, object_count;
uint64_t refdbytes, usedobjs, scratch;
- char numbuf[8];
+ char numbuf[32];
char blkbuf[BP_SPRINTF_LEN + 20];
char osname[MAXNAMELEN];
char *type = "UNKNOWN";
@@ -1190,21 +1583,20 @@ dump_dir(objset_t *os)
if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
- usedobjs = os->os->os_rootbp->blk_fill;
- refdbytes = os->os->os_spa->spa_dsl_pool->
+ usedobjs = os->os_rootbp->blk_fill;
+ refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}
- ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+ ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
- nicenum(refdbytes, numbuf);
+ zdb_nicenum(refdbytes, numbuf);
if (verbosity >= 4) {
- (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
- (void) sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+ (void) sprintf(blkbuf, ", rootbp ");
+ (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
} else {
blkbuf[0] = '\0';
}
@@ -1217,18 +1609,6 @@ dump_dir(objset_t *os)
(u_longlong_t)dds.dds_creation_txg,
numbuf, (u_longlong_t)usedobjs, blkbuf);
- dump_intent_log(dmu_objset_zil(os));
-
- if (dmu_objset_ds(os) != NULL)
- dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
- dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
-
- if (verbosity < 2)
- return;
-
- if (os->os->os_rootbp->blk_birth == 0)
- return;
-
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
@@ -1237,10 +1617,22 @@ dump_dir(objset_t *os)
return;
}
+ if (dump_opt['i'] != 0 || verbosity >= 2)
+ dump_intent_log(dmu_objset_zil(os));
+
+ if (dmu_objset_ds(os) != NULL)
+ dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
+
+ if (verbosity < 2)
+ return;
+
+ if (os->os_rootbp->blk_birth == 0)
+ return;
+
dump_object(os, 0, verbosity, &print_header);
object_count = 0;
- if (os->os->os_userused_dnode &&
- os->os->os_userused_dnode->dn_type != 0) {
+ if (DMU_USERUSED_DNODE(os) != NULL &&
+ DMU_USERUSED_DNODE(os)->dn_type != 0) {
dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
}
@@ -1262,11 +1654,11 @@ dump_dir(objset_t *os)
}
static void
-dump_uberblock(uberblock_t *ub)
+dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
{
time_t timestamp = ub->ub_timestamp;
- (void) printf("Uberblock\n\n");
+ (void) printf(header ? header : "");
(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
@@ -1275,25 +1667,34 @@ dump_uberblock(uberblock_t *ub)
(u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
if (dump_opt['u'] >= 3) {
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
+ sprintf_blkptr(blkbuf, &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
}
- (void) printf("\n");
+ (void) printf(footer ? footer : "");
}
static void
-dump_config(const char *pool)
+dump_config(spa_t *spa)
{
- spa_t *spa = NULL;
+ dmu_buf_t *db;
+ size_t nvsize = 0;
+ int error = 0;
+
+
+ error = dmu_bonus_hold(spa->spa_meta_objset,
+ spa->spa_config_object, FTAG, &db);
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (pool == NULL)
- (void) printf("%s\n", spa_name(spa));
- if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
- dump_nvlist(spa->spa_config, 4);
+ if (error == 0) {
+ nvsize = *(uint64_t *)db->db_data;
+ dmu_buf_rele(db, FTAG);
+
+ (void) printf("\nMOS Configuration:\n");
+ dump_packed_nvlist(spa->spa_meta_objset,
+ spa->spa_config_object, (void *)&nvsize, 1);
+ } else {
+ (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
+ (u_longlong_t)spa->spa_config_object, error);
}
- mutex_exit(&spa_namespace_lock);
}
static void
@@ -1342,41 +1743,75 @@ dump_cachefile(const char *cachefile)
nvlist_free(config);
}
+#define ZDB_MAX_UB_HEADER_SIZE 32
+
+static void
+dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
+{
+ vdev_t vd;
+ vdev_t *vdp = &vd;
+ char header[ZDB_MAX_UB_HEADER_SIZE];
+
+ vd.vdev_ashift = ashift;
+ vdp->vdev_top = vdp;
+
+ for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
+ uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
+ uberblock_t *ub = (void *)((char *)lbl + uoff);
+
+ if (uberblock_verify(ub))
+ continue;
+ (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
+ "Uberblock[%d]\n", i);
+ dump_uberblock(ub, header, "");
+ }
+}
+
static void
dump_label(const char *dev)
{
int fd;
vdev_label_t label;
- char *buf = label.vl_vdev_phys.vp_nvlist;
+ char *path, *buf = label.vl_vdev_phys.vp_nvlist;
size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
struct stat64 statbuf;
- uint64_t psize;
- int l;
+ uint64_t psize, ashift;
+ int len = strlen(dev) + 1;
+
+ if (strncmp(dev, "/dev/dsk/", 9) == 0) {
+ len++;
+ path = malloc(len);
+ (void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
+ } else {
+ path = strdup(dev);
+ }
- if ((fd = open64(dev, O_RDONLY)) < 0) {
- (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
+ if ((fd = open64(path, O_RDONLY)) < 0) {
+ (void) printf("cannot open '%s': %s\n", path, strerror(errno));
+ free(path);
exit(1);
}
if (fstat64(fd, &statbuf) != 0) {
- (void) printf("failed to stat '%s': %s\n", dev,
+ (void) printf("failed to stat '%s': %s\n", path,
strerror(errno));
+ free(path);
+ (void) close(fd);
exit(1);
}
- if (S_ISCHR(statbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &statbuf.st_size) == -1) {
- (void) printf("failed to get size of '%s': %s\n", dev,
- strerror(errno));
- exit(1);
- }
+ if (S_ISBLK(statbuf.st_mode)) {
+ (void) printf("cannot use '%s': character device required\n",
+ path);
+ free(path);
+ (void) close(fd);
+ exit(1);
}
psize = statbuf.st_size;
psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
- for (l = 0; l < VDEV_LABELS; l++) {
-
+ for (int l = 0; l < VDEV_LABELS; l++) {
nvlist_t *config = NULL;
(void) printf("--------------------------------------------\n");
@@ -1391,105 +1826,47 @@ dump_label(const char *dev)
if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
(void) printf("failed to unpack label %d\n", l);
- continue;
+ ashift = SPA_MINBLOCKSHIFT;
+ } else {
+ nvlist_t *vdev_tree = NULL;
+
+ dump_nvlist(config, 4);
+ if ((nvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
+ (nvlist_lookup_uint64(vdev_tree,
+ ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
+ ashift = SPA_MINBLOCKSHIFT;
+ nvlist_free(config);
}
- dump_nvlist(config, 4);
- nvlist_free(config);
+ if (dump_opt['u'])
+ dump_label_uberblocks(&label, ashift);
}
+
+ free(path);
+ (void) close(fd);
}
/*ARGSUSED*/
static int
-dump_one_dir(char *dsname, void *arg)
+dump_one_dir(const char *dsname, void *arg)
{
int error;
objset_t *os;
- error = dmu_objset_open(dsname, DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error) {
- (void) printf("Could not open %s\n", dsname);
+ (void) printf("Could not open %s, error %d\n", dsname, error);
return (0);
}
dump_dir(os);
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
fuid_table_destroy();
+ sa_loaded = B_FALSE;
return (0);
}
-static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
-{
- vdev_t *vd = sm->sm_ppd;
-
- (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
- (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_load(space_map_t *sm)
-{
-}
-
-static void
-zdb_space_map_unload(space_map_t *sm)
-{
- space_map_vacate(sm, zdb_leak, sm);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
-}
-
-static space_map_ops_t zdb_space_map_ops = {
- zdb_space_map_load,
- zdb_space_map_unload,
- NULL, /* alloc */
- zdb_space_map_claim,
- NULL, /* free */
- NULL /* maxsize */
-};
-
-static void
-zdb_leak_init(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
- SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
- msp->ms_map.sm_ppd = vd;
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
-static void
-zdb_leak_fini(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- space_map_unload(&msp->ms_map);
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
/*
- * Verify that the sum of the sizes of all blocks in the pool adds up
- * to the SPA's sa_alloc total.
+ * Block statistics.
*/
typedef struct zdb_blkstats {
uint64_t zb_asize;
@@ -1498,24 +1875,45 @@ typedef struct zdb_blkstats {
uint64_t zb_count;
} zdb_blkstats_t;
-#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_TOTAL DMU_OT_NUMTYPES
+/*
+ * Extended object types to report deferred frees and dedup auto-ditto blocks.
+ */
+#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
+#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
+#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2)
+
+static char *zdb_ot_extname[] = {
+ "deferred free",
+ "dedup ditto",
+ "Total",
+};
#define ZB_TOTAL DN_MAX_LEVELS
typedef struct zdb_cb {
- zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
+ zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
+ uint64_t zcb_dedup_asize;
+ uint64_t zcb_dedup_blocks;
uint64_t zcb_errors[256];
int zcb_readfails;
int zcb_haderrors;
+ spa_t *zcb_spa;
} zdb_cb_t;
static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
+zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
+ dmu_object_type_t type)
{
+ uint64_t refcnt = 0;
+
+ ASSERT(type < ZDB_OT_TOTAL);
+
+ if (zilog && zil_bp_tree_add(zilog, bp) != 0)
+ return;
+
for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
- int t = (i & 1) ? type : DMU_OT_TOTAL;
+ int t = (i & 1) ? type : ZDB_OT_TOTAL;
zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
zb->zb_asize += BP_GET_ASIZE(bp);
@@ -1524,127 +1922,258 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
zb->zb_count++;
}
- if (dump_opt['S']) {
- boolean_t print_sig;
-
- print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
- BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
-
- if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
- print_sig = B_FALSE;
-
- if (print_sig) {
- (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
- "%llx:%llx:%llx:%llx\n",
- (u_longlong_t)BP_GET_LEVEL(bp),
- (longlong_t)BP_GET_PSIZE(bp),
- (longlong_t)BP_GET_NDVAS(bp),
- dmu_ot[BP_GET_TYPE(bp)].ot_name,
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+ if (dump_opt['L'])
+ return;
+
+ if (BP_GET_DEDUP(bp)) {
+ ddt_t *ddt;
+ ddt_entry_t *dde;
+
+ ddt = ddt_select(zcb->zcb_spa, bp);
+ ddt_enter(ddt);
+ dde = ddt_lookup(ddt, bp, B_FALSE);
+
+ if (dde == NULL) {
+ refcnt = 0;
+ } else {
+ ddt_phys_t *ddp = ddt_phys_select(dde, bp);
+ ddt_phys_decref(ddp);
+ refcnt = ddp->ddp_refcnt;
+ if (ddt_phys_total_refcnt(dde) == 0)
+ ddt_remove(ddt, dde);
}
+ ddt_exit(ddt);
}
- if (!dump_opt['L'])
- VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
- NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+ VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
+ refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
+ bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
+/* ARGSUSED */
static int
-zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
- const dnode_phys_t *dnp, void *arg)
+zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
zdb_cb_t *zcb = arg;
char blkbuf[BP_SPRINTF_LEN];
dmu_object_type_t type;
- boolean_t is_l0_metadata;
+ boolean_t is_metadata;
if (bp == NULL)
return (0);
type = BP_GET_TYPE(bp);
- zdb_count_block(spa, zcb, bp, type);
+ zdb_count_block(zcb, zilog, bp, type);
- /*
- * if we do metadata-only checksumming there's no need to checksum
- * indirect blocks here because it is done during traverse
- */
- is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES &&
- dmu_ot[type].ot_metadata);
+ is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
+
+ if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
+ int ioerr;
+ size_t size = BP_GET_PSIZE(bp);
+ void *data = malloc(size);
+ int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
- if (dump_opt['c'] > 1 || dump_opt['S'] ||
- (dump_opt['c'] && is_l0_metadata)) {
- int ioerr, size;
- void *data;
+ /* If it's an intent log block, failure is expected. */
+ if (zb->zb_level == ZB_ZIL_LEVEL)
+ flags |= ZIO_FLAG_SPECULATIVE;
- size = BP_GET_LSIZE(bp);
- data = malloc(size);
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
+ NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
+
free(data);
- /* We expect io errors on intent log */
- if (ioerr && type != DMU_OT_INTENT_LOG) {
+ if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';
- if (!dump_opt['S']) {
- (void) printf("zdb_blkptr_cb: "
- "Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
- ioerr,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf);
- }
+ (void) printf("zdb_blkptr_cb: "
+ "Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+ ioerr,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf);
}
}
zcb->zcb_readfails = 0;
if (dump_opt['b'] >= 4) {
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- (void) printf("objset %llu object %llu offset 0x%llx %s\n",
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("objset %llu object %llu "
+ "level %lld offset 0x%llx %s\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
+ (longlong_t)zb->zb_level,
+ (u_longlong_t)blkid2offset(dnp, bp, zb),
blkbuf);
}
return (0);
}
+static void
+zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ vdev_t *vd = sm->sm_ppd;
+
+ (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
+ (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_load(space_map_t *sm)
+{
+}
+
+static void
+zdb_space_map_unload(space_map_t *sm)
+{
+ space_map_vacate(sm, zdb_leak, sm);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
+{
+}
+
+static space_map_ops_t zdb_space_map_ops = {
+ zdb_space_map_load,
+ zdb_space_map_unload,
+ NULL, /* alloc */
+ zdb_space_map_claim,
+ NULL, /* free */
+ NULL /* maxsize */
+};
+
+static void
+zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ ddt_bookmark_t ddb = { 0 };
+ ddt_entry_t dde;
+ int error;
+
+ while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
+ blkptr_t blk;
+ ddt_phys_t *ddp = dde.dde_phys;
+
+ if (ddb.ddb_class == DDT_CLASS_UNIQUE)
+ return;
+
+ ASSERT(ddt_phys_total_refcnt(&dde) > 1);
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddb.ddb_checksum,
+ &dde.dde_key, ddp, &blk);
+ if (p == DDT_PHYS_DITTO) {
+ zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
+ } else {
+ zcb->zcb_dedup_asize +=
+ BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
+ zcb->zcb_dedup_blocks++;
+ }
+ }
+ if (!dump_opt['L']) {
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ ddt_enter(ddt);
+ VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
+ ddt_exit(ddt);
+ }
+ }
+
+ ASSERT(error == ENOENT);
+}
+
+static void
+zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ zcb->zcb_spa = spa;
+
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ VERIFY(space_map_load(&msp->ms_map,
+ &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
+ spa->spa_meta_objset) == 0);
+ msp->ms_map.sm_ppd = vd;
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ zdb_ddt_leak_init(spa, zcb);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+zdb_leak_fini(spa_t *spa)
+{
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+}
+
+/* ARGSUSED */
+static int
+count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ zdb_cb_t *zcb = arg;
+
+ if (dump_opt['b'] >= 4) {
+ char blkbuf[BP_SPRINTF_LEN];
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("[%s] %s\n",
+ "deferred free", blkbuf);
+ }
+ zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
+ return (0);
+}
+
static int
dump_block_stats(spa_t *spa)
{
zdb_cb_t zcb = { 0 };
zdb_blkstats_t *zb, *tzb;
- uint64_t alloc, space, logalloc;
- vdev_t *rvd = spa->spa_root_vdev;
+ uint64_t norm_alloc, norm_space, total_alloc, total_found;
+ int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
- int c, e;
- if (!dump_opt['S']) {
- (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
- (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
- (dump_opt['c'] == 1) ? "metadata " : "",
- dump_opt['c'] ? "checksums " : "",
- (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
- !dump_opt['L'] ? "nothing leaked " : "");
- }
+ (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+ (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
+ (dump_opt['c'] == 1) ? "metadata " : "",
+ dump_opt['c'] ? "checksums " : "",
+ (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
+ !dump_opt['L'] ? "nothing leaked " : "");
/*
* Load all space maps as SM_ALLOC maps, then traverse the pool
@@ -1654,39 +2183,25 @@ dump_block_stats(spa_t *spa)
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
- if (!dump_opt['L'])
- zdb_leak_init(spa);
+ zdb_leak_init(spa, &zcb);
/*
* If there's a deferred-free bplist, process that first.
*/
- if (spa->spa_sync_bplist_obj != 0) {
- bplist_t *bpl = &spa->spa_sync_bplist;
- blkptr_t blk;
- uint64_t itor = 0;
+ (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
+ count_block_cb, &zcb, NULL);
+ (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
+ count_block_cb, &zcb, NULL);
- VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
- spa->spa_sync_bplist_obj));
+ if (dump_opt['c'] > 1)
+ flags |= TRAVERSE_PREFETCH_DATA;
- while (bplist_iterate(bpl, &itor, &blk) == 0) {
- if (dump_opt['b'] >= 4) {
- char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
- (void) printf("[%s] %s\n",
- "deferred free", blkbuf);
- }
- zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
- }
-
- bplist_close(bpl);
- }
-
- zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
+ zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
- if (zcb.zcb_haderrors && !dump_opt['S']) {
+ if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
- for (e = 0; e < 256; e++) {
+ for (int e = 0; e < 256; e++) {
if (zcb.zcb_errors[e] != 0) {
(void) printf("\t%5d %llu\n",
e, (u_longlong_t)zcb.zcb_errors[e]);
@@ -1697,43 +2212,27 @@ dump_block_stats(spa_t *spa)
/*
* Report any leaked segments.
*/
- if (!dump_opt['L'])
- zdb_leak_fini(spa);
+ zdb_leak_fini(spa);
- /*
- * If we're interested in printing out the blkptr signatures,
- * return now as we don't print out anything else (including
- * errors and leaks).
- */
- if (dump_opt['S'])
- return (zcb.zcb_haderrors ? 3 : 0);
-
- alloc = spa_get_alloc(spa);
- space = spa_get_space(spa);
-
- /*
- * Log blocks allocated from a separate log device don't count
- * as part of the normal pool space; factor them in here.
- */
- logalloc = 0;
+ tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
- for (c = 0; c < rvd->vdev_children; c++)
- if (rvd->vdev_child[c]->vdev_islog)
- logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
+ norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ norm_space = metaslab_class_get_space(spa_normal_class(spa));
- tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
+ total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
+ total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
- if (tzb->zb_asize == alloc + logalloc) {
+ if (total_found == total_alloc) {
if (!dump_opt['L'])
(void) printf("\n\tNo leaks (block sum matches space"
" maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
- (u_longlong_t)tzb->zb_asize,
- (u_longlong_t)alloc + logalloc,
+ (u_longlong_t)total_found,
+ (u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
- (longlong_t)(alloc + logalloc - tzb->zb_asize));
+ (longlong_t)(total_alloc - total_found));
leaks = 1;
}
@@ -1743,33 +2242,41 @@ dump_block_stats(spa_t *spa)
(void) printf("\n");
(void) printf("\tbp count: %10llu\n",
(u_longlong_t)tzb->zb_count);
- (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
+ (void) printf("\tbp logical: %10llu avg: %6llu\n",
(u_longlong_t)tzb->zb_lsize,
(u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
- (void) printf("\tbp physical: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp physical: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_psize,
(u_longlong_t)(tzb->zb_psize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_psize);
- (void) printf("\tbp allocated: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp allocated: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_asize,
(u_longlong_t)(tzb->zb_asize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_asize);
- (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
- (u_longlong_t)alloc, 100.0 * alloc / space);
+ (void) printf("\tbp deduped: %10llu ref>1:"
+ " %6llu deduplication: %6.2f\n",
+ (u_longlong_t)zcb.zcb_dedup_asize,
+ (u_longlong_t)zcb.zcb_dedup_blocks,
+ (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
+ (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
+ (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
if (dump_opt['b'] >= 2) {
int l, t, level;
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
"\t avg\t comp\t%%Total\tType\n");
- for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
- char csize[6], lsize[6], psize[6], asize[6], avg[6];
+ for (t = 0; t <= ZDB_OT_TOTAL; t++) {
+ char csize[32], lsize[32], psize[32], asize[32];
+ char avg[32];
char *typename;
- typename = t == DMU_OT_DEFERRED ? "deferred free" :
- t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
+ if (t < DMU_OT_NUMTYPES)
+ typename = dmu_ot[t].ot_name;
+ else
+ typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
(void) printf("%6s\t%5s\t%5s\t%5s"
@@ -1799,11 +2306,11 @@ dump_block_stats(spa_t *spa)
zcb.zcb_type[ZB_TOTAL][t].zb_asize)
continue;
- nicenum(zb->zb_count, csize);
- nicenum(zb->zb_lsize, lsize);
- nicenum(zb->zb_psize, psize);
- nicenum(zb->zb_asize, asize);
- nicenum(zb->zb_asize / zb->zb_count, avg);
+ zdb_nicenum(zb->zb_count, csize);
+ zdb_nicenum(zb->zb_lsize, lsize);
+ zdb_nicenum(zb->zb_psize, psize);
+ zdb_nicenum(zb->zb_asize, asize);
+ zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
"\t%5.2f\t%6.2f\t",
@@ -1831,36 +2338,157 @@ dump_block_stats(spa_t *spa)
return (0);
}
+typedef struct zdb_ddt_entry {
+ ddt_key_t zdde_key;
+ uint64_t zdde_ref_blocks;
+ uint64_t zdde_ref_lsize;
+ uint64_t zdde_ref_psize;
+ uint64_t zdde_ref_dsize;
+ avl_node_t zdde_node;
+} zdb_ddt_entry_t;
+
+/* ARGSUSED */
+static int
+zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ avl_tree_t *t = arg;
+ avl_index_t where;
+ zdb_ddt_entry_t *zdde, zdde_search;
+
+ if (bp == NULL)
+ return (0);
+
+ if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
+ (void) printf("traversing objset %llu, %llu objects, "
+ "%lu blocks so far\n",
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)bp->blk_fill,
+ avl_numnodes(t));
+ }
+
+ if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
+ BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
+ return (0);
+
+ ddt_key_fill(&zdde_search.zdde_key, bp);
+
+ zdde = avl_find(t, &zdde_search, &where);
+
+ if (zdde == NULL) {
+ zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
+ zdde->zdde_key = zdde_search.zdde_key;
+ avl_insert(t, zdde, where);
+ }
+
+ zdde->zdde_ref_blocks += 1;
+ zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
+ zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
+ zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
+
+ return (0);
+}
+
+static void
+dump_simulated_ddt(spa_t *spa)
+{
+ avl_tree_t t;
+ void *cookie = NULL;
+ zdb_ddt_entry_t *zdde;
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ avl_create(&t, ddt_entry_compare,
+ sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
+ zdb_ddt_add_cb, &t);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+ while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
+ ddt_stat_t dds;
+ uint64_t refcnt = zdde->zdde_ref_blocks;
+ ASSERT(refcnt != 0);
+
+ dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
+ dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
+ dds.dds_psize = zdde->zdde_ref_psize / refcnt;
+ dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
+
+ dds.dds_ref_blocks = zdde->zdde_ref_blocks;
+ dds.dds_ref_lsize = zdde->zdde_ref_lsize;
+ dds.dds_ref_psize = zdde->zdde_ref_psize;
+ dds.dds_ref_dsize = zdde->zdde_ref_dsize;
+
+ ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
+
+ umem_free(zdde, sizeof (*zdde));
+ }
+
+ avl_destroy(&t);
+
+ ddt_histogram_stat(&dds_total, &ddh_total);
+
+ (void) printf("Simulated DDT histogram:\n");
+
+ zpool_dump_ddt(&dds_total, &ddh_total);
+
+ dump_dedup_ratio(&dds_total);
+}
+
static void
dump_zpool(spa_t *spa)
{
dsl_pool_t *dp = spa_get_dsl(spa);
int rc = 0;
+ if (dump_opt['S']) {
+ dump_simulated_ddt(spa);
+ return;
+ }
+
+ if (!dump_opt['e'] && dump_opt['C'] > 1) {
+ (void) printf("\nCached configuration:\n");
+ dump_nvlist(spa->spa_config, 8);
+ }
+
+ if (dump_opt['C'])
+ dump_config(spa);
+
if (dump_opt['u'])
- dump_uberblock(&spa->spa_uberblock);
+ dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
- if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) {
+ if (dump_opt['D'])
+ dump_all_ddts(spa);
+
+ if (dump_opt['d'] > 2 || dump_opt['m'])
+ dump_metaslabs(spa);
+
+ if (dump_opt['d'] || dump_opt['i']) {
dump_dir(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
- dump_bplist(dp->dp_meta_objset,
- spa->spa_sync_bplist_obj, "Deferred frees");
+ dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
+ if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
+ "Pool frees");
+ }
dump_dtl(spa->spa_root_vdev, 0);
}
-
- if (dump_opt['d'] >= 3 || dump_opt['m'])
- dump_metaslabs(spa);
-
- (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+ (void) dmu_objset_find(spa_name(spa), dump_one_dir,
+ NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
}
-
- if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
+ if (dump_opt['b'] || dump_opt['c'])
rc = dump_block_stats(spa);
if (dump_opt['s'])
show_pool_stats(spa);
+ if (dump_opt['h'])
+ dump_history(spa);
+
if (rc != 0)
exit(rc);
}
@@ -1879,51 +2507,13 @@ int flagbits[256];
static void
zdb_print_blkptr(blkptr_t *bp, int flags)
{
- dva_t *dva = bp->blk_dva;
- int d;
+ char blkbuf[BP_SPRINTF_LEN];
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
- /*
- * Super-ick warning: This code is also duplicated in
- * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
- * replication, too.
- */
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
- (longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]));
- (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
- "ASIZE: %llx\n", d,
- DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
- (longlong_t)DVA_GET_GRID(&dva[d]),
- (longlong_t)DVA_GET_ASIZE(&dva[d]));
- (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
- (u_longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]),
- (longlong_t)BP_GET_PSIZE(bp),
- BP_SHOULD_BYTESWAP(bp) ? "e" : "",
- !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
- "d" : "",
- DVA_GET_GANG(&dva[d]) ? "g" : "",
- BP_GET_COMPRESS(bp) != 0 ? "d" : "");
- }
- (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
- (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
- (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
- BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
- dmu_ot[BP_GET_TYPE(bp)].ot_name);
- (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
- (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
- (u_longlong_t)bp->blk_fill);
- (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
- (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("%s\n", blkbuf);
}
static void
@@ -1946,7 +2536,7 @@ zdb_dump_block_raw(void *buf, uint64_t size, int flags)
{
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array(buf, size);
- (void) write(2, buf, size);
+ (void) write(1, buf, size);
}
static void
@@ -2049,31 +2639,30 @@ name:
* flags - A string of characters specifying options
* b: Decode a blkptr at given offset within block
* *c: Calculate and display checksums
- * *d: Decompress data before dumping
+ * d: Decompress data before dumping
* e: Byteswap data before dumping
- * *g: Display data as a gang block header
- * *i: Display as an indirect block
+ * g: Display data as a gang block header
+ * i: Display as an indirect block
* p: Do I/O to physical offset
* r: Dump raw data to stdout
*
* * = not yet implemented
*/
static void
-zdb_read_block(char *thing, spa_t **spap)
+zdb_read_block(char *thing, spa_t *spa)
{
- spa_t *spa = *spap;
+ blkptr_t blk, *bp = &blk;
+ dva_t *dva = bp->blk_dva;
int flags = 0;
- uint64_t offset = 0, size = 0, blkptr_offset = 0;
+ uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
- void *buf;
- char *s, *p, *dup, *pool, *vdev, *flagstr;
- int i, error, zio_flags;
+ void *pbuf, *lbuf, *buf;
+ char *s, *p, *dup, *vdev, *flagstr;
+ int i, error;
dup = strdup(thing);
s = strtok(dup, ":");
- pool = s ? s : "";
- s = strtok(NULL, ":");
vdev = s ? s : "";
s = strtok(NULL, ":");
offset = strtoull(s ? s : "", NULL, 16);
@@ -2107,7 +2696,7 @@ zdb_read_block(char *thing, spa_t **spap)
flags |= bit;
/* If it's not something with an argument, keep going */
- if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
+ if ((bit & (ZDB_FLAG_CHECKSUM |
ZDB_FLAG_PRINT_BLKPTR)) == 0)
continue;
@@ -2122,16 +2711,6 @@ zdb_read_block(char *thing, spa_t **spap)
}
}
- if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- error = spa_open(pool, spap, (void *)zdb_read_block);
- if (error)
- fatal("Failed to open pool '%s': %s",
- pool, strerror(error));
- spa = *spap;
- }
-
vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
if (vd == NULL) {
(void) printf("***Invalid vdev: %s\n", vdev);
@@ -2139,22 +2718,58 @@ zdb_read_block(char *thing, spa_t **spap)
return;
} else {
if (vd->vdev_path)
- (void) printf("Found vdev: %s\n", vd->vdev_path);
+ (void) fprintf(stderr, "Found vdev: %s\n",
+ vd->vdev_path);
else
- (void) printf("Found vdev type: %s\n",
+ (void) fprintf(stderr, "Found vdev type: %s\n",
vd->vdev_ops->vdev_op_type);
}
- buf = umem_alloc(size, UMEM_NOFAIL);
+ psize = size;
+ lsize = size;
+
+ pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
- zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
+ BP_ZERO(bp);
+
+ DVA_SET_VDEV(&dva[0], vd->vdev_id);
+ DVA_SET_OFFSET(&dva[0], offset);
+ DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
+ DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
+
+ BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
+
+ BP_SET_LSIZE(bp, lsize);
+ BP_SET_PSIZE(bp, psize);
+ BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
+ BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
+ BP_SET_TYPE(bp, DMU_OT_NONE);
+ BP_SET_LEVEL(bp, 0);
+ BP_SET_DEDUP(bp, 0);
+ BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
zio = zio_root(spa, NULL, NULL, 0);
- /* XXX todo - cons up a BP so RAID-Z will be happy */
- zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
- ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
+
+ if (vd == vd->vdev_top) {
+ /*
+ * Treat this as a normal block read.
+ */
+ zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
+ } else {
+ /*
+ * Treat this as a vdev child I/O.
+ */
+ zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
+ ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
+ ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
+ }
+
error = zio_wait(zio);
spa_config_exit(spa, SCL_STATE, FTAG);
@@ -2163,6 +2778,52 @@ zdb_read_block(char *thing, spa_t **spap)
goto out;
}
+ if (flags & ZDB_FLAG_DECOMPRESS) {
+ /*
+ * We don't know how the data was compressed, so just try
+ * every decompress function at every inflated blocksize.
+ */
+ enum zio_compress c;
+ void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+
+ bcopy(pbuf, pbuf2, psize);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
+ lsize -= SPA_MINBLOCKSIZE) {
+ for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
+ if (zio_decompress_data(c, pbuf, lbuf,
+ psize, lsize) == 0 &&
+ zio_decompress_data(c, pbuf2, lbuf2,
+ psize, lsize) == 0 &&
+ bcmp(lbuf, lbuf2, lsize) == 0)
+ break;
+ }
+ if (c != ZIO_COMPRESS_FUNCTIONS)
+ break;
+ lsize -= SPA_MINBLOCKSIZE;
+ }
+
+ umem_free(pbuf2, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf2, SPA_MAXBLOCKSIZE);
+
+ if (lsize <= psize) {
+ (void) printf("Decompress of %s failed\n", thing);
+ goto out;
+ }
+ buf = lbuf;
+ size = lsize;
+ } else {
+ buf = pbuf;
+ size = psize;
+ }
+
if (flags & ZDB_FLAG_PRINT_BLKPTR)
zdb_print_blkptr((blkptr_t *)(void *)
((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
@@ -2177,134 +2838,92 @@ zdb_read_block(char *thing, spa_t **spap)
zdb_dump_block(thing, buf, size, flags);
out:
- umem_free(buf, size);
+ umem_free(pbuf, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
static boolean_t
-nvlist_string_match(nvlist_t *config, char *name, char *tgt)
+pool_match(nvlist_t *cfg, char *tgt)
{
+ uint64_t v, guid = strtoull(tgt, NULL, 0);
char *s;
- if (nvlist_lookup_string(config, name, &s) != 0)
- return (B_FALSE);
-
- return (strcmp(s, tgt) == 0);
-}
-
-static boolean_t
-nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
-{
- uint64_t val;
-
- if (nvlist_lookup_uint64(config, name, &val) != 0)
- return (B_FALSE);
-
- return (val == tgt);
-}
-
-static boolean_t
-vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c)
- if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
- return (B_TRUE);
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_child_string_match(nvlist_t *vdev, char *tgt)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c) {
- if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
- nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_guid_match(nvlist_t *config, uint64_t guid)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
- vdev_child_guid_match(nvroot, guid));
-}
-
-static boolean_t
-vdev_string_match(nvlist_t *config, char *tgt)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (vdev_child_string_match(nvroot, tgt));
-}
-
-static boolean_t
-pool_match(nvlist_t *config, char *tgt)
-{
- uint64_t guid = strtoull(tgt, NULL, 0);
-
if (guid != 0) {
- return (
- nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
- vdev_guid_match(config, guid));
+ if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
+ return (v == guid);
} else {
- return (
- nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
- vdev_string_match(config, tgt));
+ if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
+ return (strcmp(s, tgt) == 0);
}
+ return (B_FALSE);
}
-static int
-find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
+static char *
+find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
{
nvlist_t *pools;
- int error = ENOENT;
nvlist_t *match = NULL;
+ char *name = NULL;
+ char *sepp = NULL;
+ char sep;
+ int count = 0;
+ importargs_t args = { 0 };
- if (vdev_dir != NULL)
- pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
- else
- pools = zpool_find_import_activeok(g_zfs, 0, NULL);
+ args.paths = dirc;
+ args.path = dirv;
+ args.can_be_active = B_TRUE;
+
+ if ((sepp = strpbrk(*target, "/@")) != NULL) {
+ sep = *sepp;
+ *sepp = '\0';
+ }
+
+ pools = zpool_search_import(g_zfs, &args);
if (pools != NULL) {
nvpair_t *elem = NULL;
-
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
verify(nvpair_value_nvlist(elem, configp) == 0);
- if (pool_match(*configp, pool_id)) {
+ if (pool_match(*configp, *target)) {
+ count++;
if (match != NULL) {
- (void) fatal(
- "More than one matching pool - "
- "specify guid/devid/device path.");
+ /* print previously found config */
+ if (name != NULL) {
+ (void) printf("%s\n", name);
+ dump_nvlist(match, 8);
+ name = NULL;
+ }
+ (void) printf("%s\n",
+ nvpair_name(elem));
+ dump_nvlist(*configp, 8);
} else {
match = *configp;
- error = 0;
+ name = nvpair_name(elem);
}
}
}
}
+ if (count > 1)
+ (void) fatal("\tMatched %d pools - use pool GUID "
+ "instead of pool name or \n"
+ "\tpool name part of a dataset name to select pool", count);
- *configp = error ? NULL : match;
+ if (sepp)
+ *sepp = sep;
+ /*
+ * If pool GUID was specified for pool id, replace it with pool name
+ */
+ if (name && (strstr(*target, name) != *target)) {
+ int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
+
+ *target = umem_alloc(sz, UMEM_NOFAIL);
+ (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
+ }
- return (error);
+ *configp = name ? match : NULL;
+
+ return (name);
}
int
@@ -2312,83 +2931,85 @@ main(int argc, char **argv)
{
int i, c;
struct rlimit rl = { 1024, 1024 };
- spa_t *spa;
+ spa_t *spa = NULL;
objset_t *os = NULL;
- char *endstr;
int dump_all = 1;
int verbose = 0;
- int error;
- int exported = 0;
- char *vdev_dir = NULL;
+ int error = 0;
+ char **searchdirs = NULL;
+ int nsearch = 0;
+ char *target;
+ nvlist_t *policy = NULL;
+ uint64_t max_txg = UINT64_MAX;
+ int rewind = ZPOOL_NEVER_REWIND;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) {
+ while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
- case 'u':
- case 'd':
- case 'i':
case 'b':
case 'c':
+ case 'd':
+ case 'h':
+ case 'i':
+ case 'l':
case 'm':
case 's':
+ case 'u':
case 'C':
- case 'l':
+ case 'D':
case 'R':
+ case 'S':
dump_opt[c]++;
dump_all = 0;
break;
+ case 'A':
+ case 'F':
case 'L':
+ case 'X':
+ case 'e':
+ case 'P':
dump_opt[c]++;
break;
case 'v':
verbose++;
break;
- case 'U':
- spa_config_path = optarg;
- break;
- case 'e':
- exported = 1;
- break;
case 'p':
- vdev_dir = optarg;
- break;
- case 'S':
- dump_opt[c]++;
- dump_all = 0;
- zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
- if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
- usage();
- endstr = strchr(optarg, ':') + 1;
- if (strcmp(endstr, "fletcher2") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else if (strcmp(endstr, "fletcher4") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
- else if (strcmp(endstr, "sha256") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
- else if (strcmp(endstr, "all") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else
- usage();
+ if (searchdirs == NULL) {
+ searchdirs = umem_alloc(sizeof (char *),
+ UMEM_NOFAIL);
+ } else {
+ char **tmp = umem_alloc((nsearch + 1) *
+ sizeof (char *), UMEM_NOFAIL);
+ bcopy(searchdirs, tmp, nsearch *
+ sizeof (char *));
+ umem_free(searchdirs,
+ nsearch * sizeof (char *));
+ searchdirs = tmp;
+ }
+ searchdirs[nsearch++] = optarg;
break;
case 't':
- ub_max_txg = strtoull(optarg, NULL, 0);
- if (ub_max_txg < TXG_INITIAL) {
+ max_txg = strtoull(optarg, NULL, 0);
+ if (max_txg < TXG_INITIAL) {
(void) fprintf(stderr, "incorrect txg "
"specified: %s\n", optarg);
usage();
}
break;
+ case 'U':
+ spa_config_path = optarg;
+ break;
default:
usage();
break;
}
}
- if (vdev_dir != NULL && exported == 0) {
+ if (!dump_opt['e'] && searchdirs != NULL) {
(void) fprintf(stderr, "-p option requires use of -e\n");
usage();
}
@@ -2397,18 +3018,26 @@ main(int argc, char **argv)
g_zfs = libzfs_init();
ASSERT(g_zfs != NULL);
+ if (dump_all)
+ verbose = MAX(verbose, 1);
+
for (c = 0; c < 256; c++) {
- if (dump_all && c != 'l' && c != 'R')
+ if (dump_all && !strchr("elAFLRSXP", c))
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
}
+ aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
+ zfs_recover = (dump_opt['A'] > 1);
+
argc -= optind;
argv += optind;
+ if (argc < 2 && dump_opt['R'])
+ usage();
if (argc < 1) {
- if (dump_opt['C']) {
+ if (!dump_opt['e'] && dump_opt['C']) {
dump_cachefile(spa_config_path);
return (0);
}
@@ -2420,99 +3049,107 @@ main(int argc, char **argv)
return (0);
}
- if (dump_opt['R']) {
- flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
- flagbits['c'] = ZDB_FLAG_CHECKSUM;
- flagbits['d'] = ZDB_FLAG_DECOMPRESS;
- flagbits['e'] = ZDB_FLAG_BSWAP;
- flagbits['g'] = ZDB_FLAG_GBH;
- flagbits['i'] = ZDB_FLAG_INDIRECT;
- flagbits['p'] = ZDB_FLAG_PHYS;
- flagbits['r'] = ZDB_FLAG_RAW;
-
- spa = NULL;
- while (argv[0]) {
- zdb_read_block(argv[0], &spa);
- argv++;
- argc--;
- }
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- return (0);
- }
+ if (dump_opt['X'] || dump_opt['F'])
+ rewind = ZPOOL_DO_REWIND |
+ (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
- if (dump_opt['C'])
- dump_config(argv[0]);
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
+ nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
+ fatal("internal error: %s", strerror(ENOMEM));
error = 0;
- if (exported) {
- /*
- * Check to see if the name refers to an exported zpool
- */
- char *slash;
- nvlist_t *exported_conf = NULL;
-
- if ((slash = strchr(argv[0], '/')) != NULL)
- *slash = '\0';
-
- error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
- if (error == 0) {
- nvlist_t *nvl = NULL;
-
- if (vdev_dir != NULL) {
- if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
- error = ENOMEM;
- else if (nvlist_add_string(nvl,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
- vdev_dir) != 0)
- error = ENOMEM;
- }
+ target = argv[0];
- if (error == 0)
- error = spa_import_verbatim(argv[0],
- exported_conf, nvl);
+ if (dump_opt['e']) {
+ nvlist_t *cfg = NULL;
+ char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
- nvlist_free(nvl);
+ error = ENOENT;
+ if (name) {
+ if (dump_opt['C'] > 1) {
+ (void) printf("\nConfiguration for import:\n");
+ dump_nvlist(cfg, 8);
+ }
+ if (nvlist_add_nvlist(cfg,
+ ZPOOL_REWIND_POLICY, policy) != 0) {
+ fatal("can't open '%s': %s",
+ target, strerror(ENOMEM));
+ }
+ if ((error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_MISSING_LOG)) != 0) {
+ error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_VERBATIM);
+ }
}
-
- if (slash != NULL)
- *slash = '/';
}
if (error == 0) {
- if (strchr(argv[0], '/') != NULL) {
- error = dmu_objset_open(argv[0], DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
+ error = spa_open_rewind(target, &spa, FTAG, policy,
+ NULL);
+ if (error) {
+ /*
+ * If we're missing the log device then
+ * try opening the pool after clearing the
+ * log state.
+ */
+ mutex_enter(&spa_namespace_lock);
+ if ((spa = spa_lookup(target)) != NULL &&
+ spa->spa_log_state == SPA_LOG_MISSING) {
+ spa->spa_log_state = SPA_LOG_CLEAR;
+ error = 0;
+ }
+ mutex_exit(&spa_namespace_lock);
+
+ if (!error) {
+ error = spa_open_rewind(target, &spa,
+ FTAG, policy, NULL);
+ }
+ }
} else {
- error = spa_open(argv[0], &spa, FTAG);
+ error = dmu_objset_own(target, DMU_OST_ANY,
+ B_TRUE, FTAG, &os);
}
}
+ nvlist_free(policy);
if (error)
- fatal("can't open %s: %s", argv[0], strerror(error));
+ fatal("can't open '%s': %s", target, strerror(error));
argv++;
- if (--argc > 0) {
- zopt_objects = argc;
- zopt_object = calloc(zopt_objects, sizeof (uint64_t));
- for (i = 0; i < zopt_objects; i++) {
- errno = 0;
- zopt_object[i] = strtoull(argv[i], NULL, 0);
- if (zopt_object[i] == 0 && errno != 0)
- fatal("bad object number %s: %s",
- argv[i], strerror(errno));
+ argc--;
+ if (!dump_opt['R']) {
+ if (argc > 0) {
+ zopt_objects = argc;
+ zopt_object = calloc(zopt_objects, sizeof (uint64_t));
+ for (i = 0; i < zopt_objects; i++) {
+ errno = 0;
+ zopt_object[i] = strtoull(argv[i], NULL, 0);
+ if (zopt_object[i] == 0 && errno != 0)
+ fatal("bad number %s: %s",
+ argv[i], strerror(errno));
+ }
}
- }
-
- if (os != NULL) {
- dump_dir(os);
- dmu_objset_close(os);
+ (os != NULL) ? dump_dir(os) : dump_zpool(spa);
} else {
- dump_zpool(spa);
- spa_close(spa, FTAG);
+ flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
+ flagbits['c'] = ZDB_FLAG_CHECKSUM;
+ flagbits['d'] = ZDB_FLAG_DECOMPRESS;
+ flagbits['e'] = ZDB_FLAG_BSWAP;
+ flagbits['g'] = ZDB_FLAG_GBH;
+ flagbits['i'] = ZDB_FLAG_INDIRECT;
+ flagbits['p'] = ZDB_FLAG_PHYS;
+ flagbits['r'] = ZDB_FLAG_RAW;
+
+ for (i = 0; i < argc; i++)
+ zdb_read_block(argv[i], spa);
}
+ (os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
+
fuid_table_destroy();
+ sa_loaded = B_FALSE;
libzfs_fini(g_zfs);
kernel_fini();
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index 1b3c18fab1c2..a0ed985f52b7 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -40,12 +40,14 @@
extern uint8_t dump_opt[256];
+static char prefix[4] = "\t\t\t";
+
static void
print_log_bp(const blkptr_t *bp, const char *prefix)
{
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
(void) printf("%s%s\n", prefix, blkbuf);
}
@@ -54,19 +56,29 @@ static void
zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
{
time_t crtime = lr->lr_crtime[0];
- char *name = (char *)(lr + 1);
- char *link = name + strlen(name) + 1;
+ char *name, *link;
+ lr_attr_t *lrattr;
- if (txtype == TX_SYMLINK)
- (void) printf("\t\t\t%s -> %s\n", name, link);
- else
- (void) printf("\t\t\t%s\n", name);
+ name = (char *)(lr + 1);
+
+ if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR ||
+ lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) {
+ lrattr = (lr_attr_t *)(lr + 1);
+ name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
+ }
+
+ if (txtype == TX_SYMLINK) {
+ link = name + strlen(name) + 1;
+ (void) printf("%s%s -> %s\n", prefix, name, link);
+ } else if (txtype != TX_MKXATTR) {
+ (void) printf("%s%s\n", prefix, name);
+ }
- (void) printf("\t\t\t%s", ctime(&crtime));
- (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
+ (void) printf("%s%s", prefix, ctime(&crtime));
+ (void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
(longlong_t)lr->lr_mode);
- (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
+ (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
(u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
(u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
}
@@ -75,7 +87,7 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
static void
zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
{
- (void) printf("\t\t\tdoid %llu, name %s\n",
+ (void) printf("%sdoid %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (char *)(lr + 1));
}
@@ -83,7 +95,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
static void
zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
{
- (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
+ (void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
(char *)(lr + 1));
}
@@ -95,9 +107,9 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
char *snm = (char *)(lr + 1);
char *tnm = snm + strlen(snm) + 1;
- (void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
+ (void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
- (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
+ (void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
}
/* ARGSUSED */
@@ -106,44 +118,48 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
char *data, *dlimit;
blkptr_t *bp = &lr->lr_blkptr;
+ zbookmark_t zb;
char buf[SPA_MAXBLOCKSIZE];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;
- (void) printf("\t\t\tfoid %llu, offset 0x%llx,"
- " length 0x%llx, blkoff 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
- (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
+ (void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
+ (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
+ (u_longlong_t)lr->lr_length);
if (txtype == TX_WRITE2 || verbose < 5)
return;
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
- (void) printf("\t\t\thas blkptr, %s\n",
+ (void) printf("%shas blkptr, %s\n", prefix,
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
- print_log_bp(bp, "\t\t\t");
+ print_log_bp(bp, prefix);
+
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
}
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
- } else {
- zbookmark_t zb;
-
- zb.zb_objset = dmu_objset_id(zilog->zl_os);
- zb.zb_object = lr->lr_foid;
- zb.zb_level = 0;
- zb.zb_blkid = -1; /* unknown */
-
- error = zio_wait(zio_read(NULL, zilog->zl_spa,
- bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
- if (error)
- return;
+ (void) printf("%s<hole>\n", prefix);
+ return;
+ }
+ if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
+ (void) printf("%s<block already committed>\n", prefix);
+ return;
}
- data = buf + lr->lr_blkoff;
+
+ SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
+ lr->lr_foid, ZB_ZIL_LEVEL,
+ lr->lr_offset / BP_GET_LSIZE(bp));
+
+ error = zio_wait(zio_read(NULL, zilog->zl_spa,
+ bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
+ if (error)
+ return;
+ data = buf;
} else {
data = (char *)(lr + 1);
}
@@ -151,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
dlimit = data + MIN(lr->lr_length,
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
- (void) printf("\t\t\t");
+ (void) printf("%s", prefix);
while (data < dlimit) {
if (isprint(*data))
(void) printf("%c ", *data);
@@ -166,7 +182,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
static void
zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
{
- (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
+ (void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length);
}
@@ -178,38 +194,38 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
time_t atime = (time_t)lr->lr_atime[0];
time_t mtime = (time_t)lr->lr_mtime[0];
- (void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
+ (void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
if (lr->lr_mask & AT_MODE) {
- (void) printf("\t\t\tAT_MODE %llo\n",
+ (void) printf("%sAT_MODE %llo\n", prefix,
(longlong_t)lr->lr_mode);
}
if (lr->lr_mask & AT_UID) {
- (void) printf("\t\t\tAT_UID %llu\n",
+ (void) printf("%sAT_UID %llu\n", prefix,
(u_longlong_t)lr->lr_uid);
}
if (lr->lr_mask & AT_GID) {
- (void) printf("\t\t\tAT_GID %llu\n",
+ (void) printf("%sAT_GID %llu\n", prefix,
(u_longlong_t)lr->lr_gid);
}
if (lr->lr_mask & AT_SIZE) {
- (void) printf("\t\t\tAT_SIZE %llu\n",
+ (void) printf("%sAT_SIZE %llu\n", prefix,
(u_longlong_t)lr->lr_size);
}
if (lr->lr_mask & AT_ATIME) {
- (void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
+ (void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_atime[0],
(u_longlong_t)lr->lr_atime[1],
ctime(&atime));
}
if (lr->lr_mask & AT_MTIME) {
- (void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
+ (void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_mtime[0],
(u_longlong_t)lr->lr_mtime[1],
ctime(&mtime));
@@ -220,7 +236,7 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
{
- (void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
+ (void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
}
@@ -256,7 +272,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
};
/* ARGSUSED */
-static void
+static int
print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
{
int txtype;
@@ -280,23 +296,24 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++;
+
+ return (0);
}
/* ARGSUSED */
-static void
+static int
print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
- char blkbuf[BP_SPRINTF_LEN];
+ char blkbuf[BP_SPRINTF_LEN + 10];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
char *claim;
if (verbose <= 3)
- return;
+ return (0);
if (verbose >= 5) {
(void) strcpy(blkbuf, ", ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), bp);
+ sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
} else {
blkbuf[0] = '\0';
}
@@ -310,6 +327,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
(void) printf("\tBlock seqno %llu, %s%s\n",
(u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
+
+ return (0);
}
static void
@@ -342,17 +361,17 @@ dump_intent_log(zilog_t *zilog)
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int i;
- if (zh->zh_log.blk_birth == 0 || verbose < 2)
+ if (zh->zh_log.blk_birth == 0 || verbose < 1)
return;
- (void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
- (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
+ (void) printf("\n ZIL header: claim_txg %llu, "
+ "claim_blk_seq %llu, claim_lr_seq %llu",
+ (u_longlong_t)zh->zh_claim_txg,
+ (u_longlong_t)zh->zh_claim_blk_seq,
+ (u_longlong_t)zh->zh_claim_lr_seq);
(void) printf(" replay_seq %llu, flags 0x%llx\n",
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
- if (verbose >= 4)
- print_log_bp(&zh->zh_log, "\n\tfirst block: ");
-
for (i = 0; i < TX_MAX_TYPE; i++)
zil_rec_info[i].zri_count = 0;
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
index 0d97026a4a43..0d40a9083c7e 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -6,7 +6,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zfs 1M "5 May 2009" "SunOS 5.11" "System Administration Commands"
+.TH zfs 1M "24 Sep 2009" "SunOS 5.11" "System Administration Commands"
.SH NAME
zfs \- configures ZFS file systems
.SH SYNOPSIS
@@ -27,7 +27,12 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBdestroy\fR [\fB-rRd\fR] \fIsnapshot\fR
.fi
.LP
@@ -75,7 +80,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|snapshot ...
+\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
.fi
.LP
@@ -174,7 +179,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBallow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
+\fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
@@ -195,7 +200,29 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
+.fi
+
+\fBzfs\fR \fBjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
+.fi
+.LP
+.nf
+\fBzfs\fR \fBunjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
.fi
.SH DESCRIPTION
@@ -212,7 +239,7 @@ pool/{filesystem,volume,snapshot}
.sp
.LP
-\&...where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
+where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
.sp
.LP
A dataset can be one of the following:
@@ -224,7 +251,7 @@ A dataset can be one of the following:
.ad
.sp .6
.RS 4n
-A \fBZFS\fR dataset of type "filesystem" that can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space.
+A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space.
.RE
.sp
@@ -268,17 +295,17 @@ A snapshot is a read-only copy of a file system or volume. Snapshots can be crea
Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
.sp
.LP
-File system snapshots can be accessed under the ".zfs/snapshot" directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the ".zfs" directory can be controlled by the "snapdir" property.
+File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property.
.SS "Clones"
.sp
.LP
A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
.sp
.LP
-Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The "origin" property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
+Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
.sp
.LP
-The clone parent-child dependency relationship can be reversed by using the "\fBpromote\fR" subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from.
+The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from.
.SS "Mount Points"
.sp
.LP
@@ -304,10 +331,10 @@ A \fBZFS\fR file system can be added to a non-global zone by using the \fBzonecf
The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
.sp
.LP
-A dataset can also be delegated to a non-global zone by using \fBzonecfg\fR \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the \fBquota\fR property is controlled by the global administrator.
+A dataset can also be delegated to a non-global zone by using the \fBzonecfg\fR \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the \fBquota\fR property is controlled by the global administrator.
.sp
.LP
-A \fBZFS\fR volume can be added as a device to a non-global zone by using \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can be modified only by the global administrator.
+A \fBZFS\fR volume can be added as a device to a non-global zone by using the \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can be modified only by the global administrator.
.sp
.LP
For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
@@ -320,7 +347,7 @@ The global administrator can forcibly clear the \fBzoned\fR property, though thi
.SS "Native Properties"
.sp
.LP
-Properties are divided into two types, native and user-defined (or "user"). Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
+Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
.sp
.LP
Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots).
@@ -380,6 +407,17 @@ The time this dataset was created.
.ne 2
.mk
.na
+\fB\fBdefer_destroy\fR\fR
+.ad
+.sp .6
+.RS 4n
+This property is \fBon\fR if the snapshot has been marked for deferred destroy by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBmounted\fR\fR
.ad
.sp .6
@@ -489,7 +527,7 @@ The amount of space used by a \fBrefreservation\fR set on this dataset, which wo
.ad
.sp .6
.RS 4n
-The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots
+The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots.
.RE
.sp
@@ -500,34 +538,34 @@ The amount of space consumed by snapshots of this dataset. In particular, it is
.ad
.sp .6
.RS 4n
-The amount of space referenced in this dataset by the specified user. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
+The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
.sp
Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage.
.sp
-This property cannot be set on volumes, or on pools before version 15. The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix name\fR (for example, \fBjoe\fR)
+\fIPOSIX name\fR (for example, \fBjoe\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix numeric id\fR (for example, \fB789\fR)
+\fIPOSIX numeric ID\fR (for example, \fB789\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+\fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
.RE
.RE
@@ -535,13 +573,24 @@ This property cannot be set on volumes, or on pools before version 15. The \fBus
.ne 2
.mk
.na
+\fB\fBuserrefs\fR\fR
+.ad
+.sp .6
+.RS 4n
+This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBgroupused@\fR\fIgroup\fR\fR
.ad
.sp .6
.RS 4n
-The amount of space referenced in this dataset by the specified group. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
+The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
.sp
-Unprivileged users can only access the \fBgroupused@\fR... property for groups that they are a member of. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
+Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
.RE
.sp
@@ -618,7 +667,9 @@ This property is not inherited.
.ad
.sp .6
.RS 4n
-Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher2\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
+Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
+.sp
+Changing this property affects only newly-written data.
.RE
.sp
@@ -629,22 +680,22 @@ Controls the checksum used to verify data integrity. The default value is \fBon\
.ad
.sp .6
.RS 4n
-Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip" compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR" where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
+Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to \fBon\fR uses the \fBlzjb\fR compression algorithm. The \fBgzip\fR compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR (which is also the default for \fBgzip\fR(1)).
.sp
-This property can also be referred to by its shortened column name "compress".
+This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data.
.RE
.sp
.ne 2
.mk
.na
-\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
+\fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR
.ad
.sp .6
.RS 4n
-Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or \fBraid-z\fR. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
+Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
.sp
-Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR option.
+Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option.
.RE
.sp
@@ -725,36 +776,36 @@ Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implic
.ad
.sp .6
.RS 4n
-Limits the amount of space referenced by the specified user, which is specified by the \fBuserspace@\fR\fIuser\fR property.
+Limits the amount of space consumed by the specified user. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property.
.sp
-Enforcement of user quotas may be delayed by several seconds. In other words, users may go a bit over their quota before the system notices that they are over quota and begins to refuse additional writes with \fBEDQUOT\fR. See the \fBzfs userspace\fR subcommand for more information.
+Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information.
.sp
-Unprivileged users can get only their own quota. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
+Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
.sp
-This property cannot be set on volumes, on filesystems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix name\fR (for example, \fBjoe\fR)
+\fIPOSIX name\fR (for example, \fBjoe\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix numeric id\fR (for example, \fB789\fR)
+\fIPOSIX numeric ID\fR (for example, \fB789\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+\fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
.RE
.RE
@@ -766,9 +817,9 @@ This property cannot be set on volumes, on filesystems before version 4, or on p
.ad
.sp .6
.RS 4n
-Limits the amount of space referenced by the specified group. See the \fBuserquota@\fR\fIuser\fR property for more information.
+Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property.
.sp
-Unprivileged users can only get the quota of groups they are a member of. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
+Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
.RE
.sp
@@ -904,7 +955,18 @@ When the \fBsharesmb\fR property is changed for a dataset, the dataset and any c
.RS 4n
Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
.sp
-When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
+When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously \fBoff\fR, or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBlogbias\fR = \fBlatency\fR | \fBthroughput\fR\fR
+.ad
+.sp .6
+.RS 4n
+Provide a hint to ZFS about handling of synchronous requests in this dataset. If \fBlogbias\fR is set to \fBlatency\fR (the default), ZFS will use pool log devices (if configured) to handle the requests at low latency. If \fBlogbias\fR is set to \fBthroughput\fR, ZFS will not use configured pool log devices. ZFS will instead optimize synchronous operations for global pool throughput and efficient use of resources.
.RE
.sp
@@ -959,7 +1021,7 @@ Controls whether regular files should be scanned for viruses when a file is open
.ne 2
.mk
.na
-\fBxattr=\fBon\fR | \fBoff\fR\fR
+\fB\fBxattr\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
@@ -997,7 +1059,7 @@ The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the
.ne 2
.mk
.na
-\fB\fBnormalization\fR=\fBnone\fR | \fBformD\fR | \fBformKCf\fR\fR
+\fB\fBnormalization\fR = \fBnone\fR | \fBformC\fR | \fBformD\fR | \fBformKC\fR | \fBformKD\fR\fR
.ad
.sp .6
.RS 4n
@@ -1008,6 +1070,17 @@ Indicates whether the file system should perform a \fBunicode\fR normalization o
.ne 2
.mk
.na
+\fBjailed =\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the dataset is managed from within a jail. The default value is "off".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
@@ -1081,7 +1154,7 @@ Displays a help message.
.ad
.sp .6
.RS 4n
-Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the "mountpoint" property inherited from the parent.
+Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the \fBmountpoint\fR property inherited from the parent.
.sp
.ne 2
.mk
@@ -1101,7 +1174,7 @@ Creates all the non-existing parent datasets. Datasets created in this manner ar
.ad
.sp .6
.RS 4n
-Sets the specified property as if the command \fBzfs set \fIproperty\fR=\fIvalue\fR\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the command \fBzfs set\fR \fIproperty\fR=\fIvalue\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.RE
@@ -1114,7 +1187,7 @@ Sets the specified property as if the command \fBzfs set \fIproperty\fR=\fIvalue
.ad
.sp .6
.RS 4n
-Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fIpath\fR\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created.
+Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fR\fIpath\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created.
.sp
\fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR.
.sp
@@ -1147,7 +1220,7 @@ Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native Pro
.ad
.sp .6
.RS 4n
-Sets the specified property as if the \fBzfs set \fIproperty\fR=\fIvalue\fR\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the \fBzfs set\fR \fIproperty\fR=\fIvalue\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.sp
@@ -1158,7 +1231,7 @@ Sets the specified property as if the \fBzfs set \fIproperty\fR=\fIvalue\fR\fR c
.ad
.sp .6
.RS 4n
-Equivalent to \fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
+Equivalent to \fB-o\fR \fBvolblocksize\fR=\fIblocksize\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
.RE
.RE
@@ -1167,11 +1240,11 @@ Equivalent to \fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR. If this option
.ne 2
.mk
.na
-\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR\fR
.ad
.sp .6
.RS 4n
-Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children, snapshots, clones).
+Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children or clones).
.sp
.ne 2
.mk
@@ -1180,7 +1253,7 @@ Destroys the given dataset. By default, the command unshares any file systems th
.ad
.sp .6
.RS 4n
-Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
+Recursively destroy all children.
.RE
.sp
@@ -1191,7 +1264,7 @@ Recursively destroy all children. If a snapshot is specified, destroy all snapsh
.ad
.sp .6
.RS 4n
-Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
+Recursively destroy all dependents, including cloned file systems outside the target hierarchy.
.RE
.sp
@@ -1212,6 +1285,52 @@ Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR o
.ne 2
.mk
.na
+\fB\fBzfs destroy\fR [\fB-rRd\fR] \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+The given snapshot is destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero.
+.sp
+If the snapshot does not qualify for immediate destruction, it is marked for deferred deletion. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR\fR
+.ad
+.sp .6
+.RS 4n
+Defer snapshot deletion.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Destroy (or mark for deferred deletion) all snapshots with this name in descendent file systems.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively destroy all dependents.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR
.ad
.sp .6
@@ -1251,6 +1370,8 @@ Sets the specified property; see \fBzfs create\fR for details.
.RS 4n
Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option.
.sp
+The \fB-rR\fR options do not recursively destroy the child snapshots of a recursive snapshot. Only the top-level recursive snapshot is destroyed by either of these options. To completely roll back a recursive snapshot, you must rollback the individual child snapshots.
+.sp
.ne 2
.mk
.na
@@ -1380,15 +1501,7 @@ Recursively rename the snapshots of all descendent datasets. Snapshots are the o
.ad
.sp .6
.RS 4n
-Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed:
-.sp
-.in +2
-.nf
-name,used,available,referenced,mountpoint
-.fi
-.in -2
-.sp
-
+Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed, \fBname,used,available,referenced,mountpoint\fR.
.sp
.ne 2
.mk
@@ -1397,7 +1510,7 @@ name,used,available,referenced,mountpoint
.ad
.sp .6
.RS 4n
-Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary whitespace.
+Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space.
.RE
.sp
@@ -1435,34 +1548,25 @@ A comma-separated list of properties to display. The property must be:
.TP
.ie t \(bu
.el o
-one of the properties described in the "Native Properties" section
+One of the properties described in the "Native Properties" section
.RE
.RS +4
.TP
.ie t \(bu
.el o
-a user property
+A user property
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value \fBname\fR to display the dataset name
+The value \fBname\fR to display the dataset name
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for:
-.sp
-.in +2
-.nf
--o name,avail,used,usedsnap,usedds,usedrefreserv,\e
-usedchild -t filesystem,volume
-.fi
-.in -2
-.sp
-
+The value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for specifying \fB-o name,avail,used,usedsnap,usedds,usedrefreserv,usedchild\fR \fB-t filesystem,volume\fR syntax.
.RE
.RE
@@ -1474,7 +1578,7 @@ usedchild -t filesystem,volume
.ad
.sp .6
.RS 4n
-A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
+A property for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
.sp
The following is a list of sorting criteria:
.RS +4
@@ -1535,7 +1639,7 @@ A comma-separated list of types to display, where \fItype\fR is one of \fBfilesy
.ad
.sp .6
.RS 4n
-Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
+Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). User properties can be set on snapshots. For more information, see the "User Properties" section.
.RE
.sp
@@ -1671,11 +1775,11 @@ Displays a list of file systems that are not the most recent version.
.ad
.sp .6
.RS 4n
-Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems can not be accessed on systems running older versions of the software.
+Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems cannot be accessed on systems running older versions of the software.
.sp
-The file system version is independent of the pool version (see \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command).
+In general, the file system version is independent of the pool version. See \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command.
.sp
-The file system version does not have to be upgraded when the pool version is upgraded, and vice-versa.
+In some cases, the file system version and the pool version are interrelated and the pool version must be upgraded before the file system version can be upgraded.
.sp
.ne 2
.mk
@@ -1772,16 +1876,7 @@ Use exact (parseable) numeric output.
.ad
.sp .6
.RS 4n
-Display only the specified fields, from the following set:
-.sp
-.in +2
-.nf
-type,name,used,quota
-.fi
-.in -2
-.sp
-
-The default is to display all fields.
+Display only the specified fields from the following set, \fBtype,name,used,quota\fR.The default is to display all fields.
.RE
.sp
@@ -1792,15 +1887,7 @@ The default is to display all fields.
.ad
.sp .6
.RS 4n
-Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is:
-.sp
-.in +2
-.nf
--s type -s name
-.fi
-.in -2
-.sp
-
+Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is \fB-s type\fR \fB-s name\fR.
.RE
.sp
@@ -1822,25 +1909,11 @@ Sort by this field in reverse order. See \fB-s\fR.
.ad
.sp .6
.RS 4n
-Print only the specified types, from the following set:
-.sp
-.in +2
-.nf
-all,posixuser,smbuser,posixgroup,smbgroup
-.fi
-.in -2
+Print only the specified types from the following set, \fBall,posixuser,smbuser,posixgroup,smbgroup\fR.
.sp
-
-The default is:
+The default is \fB-t posixuser,smbuser\fR
.sp
-.in +2
-.nf
--t posixuser,smbuser
-.fi
-.in -2
-.sp
-
-\&...but can be changed to include group types.
+The default can be changed to include group types.
.RE
.sp
@@ -1851,7 +1924,7 @@ The default is:
.ad
.sp .6
.RS 4n
-Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, he \fB-i\fR option will report that the POSIX entity has the total usage and quota for both.
+Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, the \fB-i\fR option will report that the POSIX entity has the total usage and quota for both.
.RE
.RE
@@ -1864,11 +1937,11 @@ Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. N
.ad
.sp .6
.RS 4n
-Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are:
+Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are \fB-t posixgroup,smbgroup\fR.
.sp
.in +2
.nf
--t posixgroup,smbgroup
+-
.fi
.in -2
.sp
@@ -2119,7 +2192,7 @@ If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag
Print verbose information about the stream package generated.
.RE
-The format of the stream is evolving. No backwards compatibility is guaranteed. You may not be able to receive your streams on future versions of \fBZFS\fR.
+The format of the stream is committed. You will be able to receive your streams on future versions of \fBZFS\fR.
.RE
.sp
@@ -2138,6 +2211,8 @@ Creates a snapshot whose contents are as specified in the stream provided on sta
.sp
If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and recreated, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation.
.sp
+When a snapshot replication package stream that is generated by using the \fBzfs send\fR \fB-R\fR command is received, any snapshots that do not exist on the sending location are destroyed by using the \fBzfs destroy\fR \fB-d\fR command.
+.sp
The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
.sp
If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR. If the \fB-d\fR option is specified, the snapshot name is determined by appending the sent snapshot's name to the specified \fIfilesystem\fR. If the \fB-d\fR option is specified, any required file systems within the specified one are created.
@@ -2241,7 +2316,7 @@ Specifies to whom the permissions are delegated. Multiple entities can be specif
.ad
.sp .6
.RS 4n
-Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an "at sign" ("@") , may be specified. See the \fB-s\fR form below for details.
+Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an at sign (\fB@\fR) , may be specified. See the \fB-s\fR form below for details.
.RE
.sp
@@ -2263,67 +2338,63 @@ Permissions are generally the ability to use a \fBZFS\fR subcommand or change a
.sp
.in +2
.nf
-NAME TYPE NOTES
-allow subcommand Must also have the permission
- that is being allowed.
-clone subcommand Must also have the 'create' ability
- and the 'mount' ability in the origin
- file system.
-create subcommand Must also have the 'mount' ability.
-destroy subcommand Must also have the 'mount' ability.
-mount subcommand Allows mount, unmount, and
- create/remove zvol device links.
-promote subcommand Must also have the 'mount' ability and
- 'promote' ability in the origin file system.
-receive subcommand Must also have the 'mount' ability and
- the 'create' ability.
-rename subcommand Must also have the 'mount' ability and
- the 'create' ability in the new parent.
-rollback subcommand Must also have the 'mount' ability.
-snapshot subcommand Must also have the 'mount' ability.
-share subcommand Allows share and unshare.
-send subcommand
-
-
-aclinherit property
-aclmode property
-atime property
-canmount property
-casesensitivity property
-checksum property
-compression property
-copies property
-devices property
-exec property
-groupquota other Allows accessing any groupquota@... property.
-groupused other Allows reading any groupused@... property.
-mountpoint property
-nbmand property
-normalization property
-primarycache property
-quota property
-readonly property
-recordsize property
-refquota property
-refreservation property
-reservation property
-secondarycache property
-setuid property
-shareiscsi property
-sharenfs property
-sharesmb property
-snapdir property
-utf8only property
-userprop other Allows changing any user property.
-userquota other Allows accessing any userquota@... property.
-userused other Allows reading any userused@... property.
-version property
-volblocksize property
-volsize property
-vscan property
-xattr property
-zoned property
-userprop other Allows changing any user property.
+NAME TYPE NOTES
+allow subcommand Must also have the permission that is being
+ allowed
+clone subcommand Must also have the 'create' ability and 'mount'
+ ability in the origin file system
+create subcommand Must also have the 'mount' ability
+destroy subcommand Must also have the 'mount' ability
+mount subcommand Allows mount/umount of ZFS datasets
+promote subcommand Must also have the 'mount'
+ and 'promote' ability in the origin file system
+receive subcommand Must also have the 'mount' and 'create' ability
+rename subcommand Must also have the 'mount' and 'create'
+ ability in the new parent
+rollback subcommand Must also have the 'mount' ability
+send subcommand
+share subcommand Allows sharing file systems over NFS or SMB
+ protocols
+snapshot subcommand Must also have the 'mount' ability
+groupquota other Allows accessing any groupquota@... property
+groupused other Allows reading any groupused@... property
+userprop other Allows changing any user property
+userquota other Allows accessing any userquota@... property
+userused other Allows reading any userused@... property
+
+aclinherit property
+aclmode property
+atime property
+canmount property
+casesensitivity property
+checksum property
+compression property
+copies property
+devices property
+exec property
+mountpoint property
+nbmand property
+normalization property
+primarycache property
+quota property
+readonly property
+recordsize property
+refquota property
+refreservation property
+reservation property
+secondarycache property
+setuid property
+shareiscsi property
+sharenfs property
+sharesmb property
+snapdir property
+utf8only property
+version property
+volblocksize property
+volsize property
+vscan property
+xattr property
+zoned property
.fi
.in -2
.sp
@@ -2343,7 +2414,7 @@ Sets "create time" permissions. These permissions are granted (locally) to the c
.ne 2
.mk
.na
-\fB\fBzfs allow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+\fB\fBzfs allow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
.ad
.sp .6
.RS 4n
@@ -2388,7 +2459,7 @@ Recursively remove the permissions from this file system and all descendents.
.ne 2
.mk
.na
-\fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,...]]\fR
+\fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR
.ad
.br
.na
@@ -2399,12 +2470,101 @@ Recursively remove the permissions from this file system and all descendents.
Removes permissions from a permission set. If no permissions are specified, then all permissions are removed, thus removing the set entirely.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs hold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Adds a single reference, named with the \fItag\fR argument, to the specified snapshot or snapshots. Each snapshot has its own tag namespace, and tags must be unique within that space.
+.sp
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies that a hold with the given tag is applied recursively to the snapshots of all descendent file systems.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs holds\fR [\fB-r\fR] \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Lists all existing user references for the given snapshot or snapshots.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Lists the holds that are set on the named descendent snapshots, in addition to listing the holds on the named snapshot.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs release\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Removes a single reference, named with the \fItag\fR argument, from the specified snapshot or snapshots. The tag must already exist for each snapshot.
+.sp
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively releases a hold with the given tag on the snapshots of all descendent file systems.
+.RE
+
+.RE
+
+\fB\fBzfs jail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Attaches the given file system to the given jail. From now on this file system tree can be managed from within a jail if the "\fBjailed\fR" property has been set.
+To use this functionality, sysctl \fBsecurity.jail.enforce_statfs\fR should be set to 0 and sysctl \fBsecurity.jail.mount_allowed\fR should be set to 1.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unjail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Detaches the given file system from the given jail.
+.RE
+
.SH EXAMPLES
.LP
\fBExample 1 \fRCreating a ZFS File System Hierarchy
.sp
.LP
-The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and automatically inherited by the child file system.
+The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and is automatically inherited by the child file system.
.sp
.in +2
@@ -2431,7 +2591,7 @@ The following command creates a snapshot named \fByesterday\fR. This snapshot is
.sp
.LP
-\fBExample 3 \fRTaking and Destroying Multiple Snapshots
+\fBExample 3 \fRCreating and Destroying Multiple Snapshots
.sp
.LP
The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR and all of its descendent file systems. Each snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its file system. The second command destroys the newly created snapshots.
@@ -2446,10 +2606,10 @@ The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR
.sp
.LP
-\fBExample 4 \fRTurning Off Compression
+\fBExample 4 \fRDisabling and Enabling File System Compression
.sp
.LP
-The following commands turn compression off for all file systems under \fBpool/home\fR, but explicitly turns it on for \fBpool/home/anne\fR.
+The following command disables the \fBcompression\fR property for all file systems under \fBpool/home\fR. The next command explicitly enables \fBcompression\fR for \fBpool/home/anne\fR.
.sp
.in +2
@@ -2464,14 +2624,12 @@ The following commands turn compression off for all file systems under \fBpool/h
\fBExample 5 \fRListing ZFS Datasets
.sp
.LP
-The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . See \fBzpool\fR(1M) for more information on pool properties.
+The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR. The default is \fBoff\fR. See \fBzpool\fR(1M) for more information on pool properties.
.sp
.in +2
.nf
# \fBzfs list\fR
-
-
NAME USED AVAIL REFER MOUNTPOINT
pool 450K 457G 18K /pool
pool/home 315K 457G 21K /export/home
@@ -2505,25 +2663,21 @@ The following command lists all properties for \fBpool/home/bob\fR.
.in +2
.nf
# \fBzfs get all pool/home/bob\fR
-
-
NAME PROPERTY VALUE SOURCE
pool/home/bob type filesystem -
-pool/home/bob creation Thu Jul 12 14:44 2007 -
-pool/home/bob used 276K -
-pool/home/bob available 50.0G -
-pool/home/bob referenced 276K -
+pool/home/bob creation Tue Jul 21 15:53 2009 -
+pool/home/bob used 21K -
+pool/home/bob available 20.0G -
+pool/home/bob referenced 21K -
pool/home/bob compressratio 1.00x -
pool/home/bob mounted yes -
-pool/home/bob quota 50G local
+pool/home/bob quota 20G local
pool/home/bob reservation none default
pool/home/bob recordsize 128K default
-pool/home/bob mountpoint /export/home/bob inherited
- from
- pool/home
+pool/home/bob mountpoint /pool/home/bob default
pool/home/bob sharenfs off default
pool/home/bob checksum on default
-pool/home/bob compression off default
+pool/home/bob compression on local
pool/home/bob atime on default
pool/home/bob devices on default
pool/home/bob exec on default
@@ -2537,22 +2691,21 @@ pool/home/bob canmount on default
pool/home/bob shareiscsi off default
pool/home/bob xattr on default
pool/home/bob copies 1 default
-pool/home/bob version 1 -
+pool/home/bob version 4 -
pool/home/bob utf8only off -
pool/home/bob normalization none -
pool/home/bob casesensitivity sensitive -
pool/home/bob vscan off default
pool/home/bob nbmand off default
pool/home/bob sharesmb off default
-pool/home/bob refquota 10M local
+pool/home/bob refquota none default
pool/home/bob refreservation none default
pool/home/bob primarycache all default
-pool/home/bob secondarycache a default
+pool/home/bob secondarycache all default
pool/home/bob usedbysnapshots 0 -
-pool/home/bob usedbydataset 18K -
+pool/home/bob usedbydataset 21K -
pool/home/bob usedbychildren 0 -
pool/home/bob usedbyrefreservation 0 -
-
.fi
.in -2
.sp
@@ -2578,10 +2731,9 @@ The following command lists all properties with local settings for \fBpool/home/
.in +2
.nf
# \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR
-
- NAME PROPERTY VALUE
- pool compression on
- pool/home checksum off
+NAME PROPERTY VALUE
+pool/home/bob quota 20G
+pool/home/bob compression on
.fi
.in -2
.sp
@@ -2669,7 +2821,7 @@ The following commands send a full stream and then an incremental stream to a re
.sp
.LP
-\fBExample 13 \fRUsing the \fBreceive\fR \fB-d\fR Option
+\fBExample 13 \fRUsing the \fBzfs receive\fR \fB-d\fR Option
.sp
.LP
The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from the name of the sent snapshot. \fBpoolB\fR must contain the file system \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created as an empty file system.
@@ -2752,7 +2904,6 @@ The following commands show how to set \fBsharenfs\fR property options to enable
.in +2
.nf
# \fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
-
.fi
.in -2
.sp
@@ -2770,13 +2921,12 @@ The following example shows how to set permissions so that user \fBcindys\fR can
.sp
.in +2
.nf
-# \fB# zfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
+# \fBzfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
# \fBzfs allow tank/cindys\fR
-------------------------------------------------------------
Local+Descendent permissions on (tank/cindys)
user cindys create,destroy,mount,snapshot
-------------------------------------------------------------
-
.fi
.in -2
.sp
@@ -2853,8 +3003,8 @@ The following example shows to grant the ability to set quotas and reservations
Local+Descendent permissions on (users/home)
user cindys quota,reservation
-------------------------------------------------------------
-cindys% zfs set quota=10G users/home/marks
-cindys% zfs get quota users/home/marks
+cindys% \fBzfs set quota=10G users/home/marks\fR
+cindys% \fBzfs get quota users/home/marks\fR
NAME PROPERTY VALUE SOURCE
users/home/marks quota 10G local
.fi
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
index ca5c2b232786..e2ab90eaf14f 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libintl.h>
@@ -107,7 +106,8 @@ zfs_callback(zfs_handle_t *zhp, void *data)
zfs_prune_proplist(zhp,
cb->cb_props_table);
- if (zfs_expand_proplist(zhp, cb->cb_proplist)
+ if (zfs_expand_proplist(zhp, cb->cb_proplist,
+ (cb->cb_flags & ZFS_ITER_RECVD_PROPS))
!= 0) {
free(node);
return (-1);
@@ -350,11 +350,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
- if (avl_pool == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
+ if (avl_pool == NULL)
+ nomem();
cb.cb_sortcol = sortcol;
cb.cb_flags = flags;
@@ -362,7 +359,7 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
cb.cb_types = types;
cb.cb_depth_limit = limit;
/*
- * If cb_proplist is provided then in the zfs_handles created we
+ * If cb_proplist is provided then in the zfs_handles created we
* retain only those properties listed in cb_proplist and sortcol.
* The rest are pruned. So, the caller should make sure that no other
* properties other than those listed in cb_proplist/sortcol are
@@ -399,11 +396,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
sizeof (cb.cb_props_table));
}
- if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
+ if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
+ nomem();
if (argc == 0) {
/*
@@ -453,11 +447,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
/*
* Finally, clean up the AVL tree.
*/
- if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
+ if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
while ((node = uu_avl_walk_next(walk)) != NULL) {
uu_avl_remove(cb.cb_avl, node);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
index a0290775b6b6..8c6b9fdef54f 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
@@ -42,6 +42,7 @@ typedef struct zfs_sort_column {
#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
#define ZFS_ITER_DEPTH_LIMIT (1 << 3)
+#define ZFS_ITER_RECVD_PROPS (1 << 4)
int zfs_for_each(int, char **, int options, zfs_type_t,
zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
index c5e9b64a1f4b..8383dbc2dde7 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -20,8 +20,8 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#include <assert.h>
@@ -41,23 +41,33 @@
#include <zone.h>
#include <grp.h>
#include <pwd.h>
+#include <signal.h>
+#include <sys/list.h>
#include <sys/mntent.h>
#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/fs/zfs.h>
+#include <sys/types.h>
+#include <time.h>
#include <libzfs.h>
+#include <zfs_prop.h>
+#include <zfs_deleg.h>
#include <libuutil.h>
+#ifdef sun
+#include <aclutils.h>
+#include <directory.h>
+#endif
#include "zfs_iter.h"
#include "zfs_util.h"
+#include "zfs_comutil.h"
libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
static char history_str[HIS_MAX_RECORD_LEN];
-const char *pypath = "/usr/lib/zfs/pyzfs.py";
static int zfs_do_clone(int argc, char **argv);
static int zfs_do_create(int argc, char **argv);
@@ -78,7 +88,12 @@ static int zfs_do_send(int argc, char **argv);
static int zfs_do_receive(int argc, char **argv);
static int zfs_do_promote(int argc, char **argv);
static int zfs_do_userspace(int argc, char **argv);
-static int zfs_do_python(int argc, char **argv);
+static int zfs_do_allow(int argc, char **argv);
+static int zfs_do_unallow(int argc, char **argv);
+static int zfs_do_hold(int argc, char **argv);
+static int zfs_do_holds(int argc, char **argv);
+static int zfs_do_release(int argc, char **argv);
+static int zfs_do_diff(int argc, char **argv);
static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
@@ -124,7 +139,11 @@ typedef enum {
HELP_ALLOW,
HELP_UNALLOW,
HELP_USERSPACE,
- HELP_GROUPSPACE
+ HELP_GROUPSPACE,
+ HELP_HOLD,
+ HELP_HOLDS,
+ HELP_RELEASE,
+ HELP_DIFF
} zfs_help_t;
typedef struct zfs_command {
@@ -155,7 +174,7 @@ static zfs_command_t command_table[] = {
{ "list", zfs_do_list, HELP_LIST },
{ NULL },
{ "set", zfs_do_set, HELP_SET },
- { "get", zfs_do_get, HELP_GET },
+ { "get", zfs_do_get, HELP_GET },
{ "inherit", zfs_do_inherit, HELP_INHERIT },
{ "upgrade", zfs_do_upgrade, HELP_UPGRADE },
{ "userspace", zfs_do_userspace, HELP_USERSPACE },
@@ -169,9 +188,14 @@ static zfs_command_t command_table[] = {
{ "send", zfs_do_send, HELP_SEND },
{ "receive", zfs_do_receive, HELP_RECEIVE },
{ NULL },
- { "allow", zfs_do_python, HELP_ALLOW },
+ { "allow", zfs_do_allow, HELP_ALLOW },
+ { NULL },
+ { "unallow", zfs_do_unallow, HELP_UNALLOW },
{ NULL },
- { "unallow", zfs_do_python, HELP_UNALLOW },
+ { "hold", zfs_do_hold, HELP_HOLD },
+ { "holds", zfs_do_holds, HELP_HOLDS },
+ { "release", zfs_do_release, HELP_RELEASE },
+ { "diff", zfs_do_diff, HELP_DIFF },
{ NULL },
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
@@ -194,15 +218,15 @@ get_usage(zfs_help_t idx)
"\tcreate [-ps] [-b blocksize] [-o property=value] ... "
"-V <size> <volume>\n"));
case HELP_DESTROY:
- return (gettext("\tdestroy [-rRf] "
- "<filesystem|volume|snapshot>\n"));
+ return (gettext("\tdestroy [-rRf] <filesystem|volume>\n"
+ "\tdestroy [-rRd] <snapshot>\n"));
case HELP_GET:
return (gettext("\tget [-rHp] [-d max] "
- "[-o field[,...]] [-s source[,...]]\n"
+ "[-o \"all\" | field[,...]] [-s source[,...]]\n"
"\t <\"all\" | property[,...]> "
"[filesystem|volume|snapshot] ...\n"));
case HELP_INHERIT:
- return (gettext("\tinherit [-r] <property> "
+ return (gettext("\tinherit [-rS] <property> "
"<filesystem|volume|snapshot> ...\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade [-v]\n"
@@ -222,9 +246,9 @@ get_usage(zfs_help_t idx)
case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
- return (gettext("\treceive [-vnF] <filesystem|volume|"
+ return (gettext("\treceive [-vnFu] <filesystem|volume|"
"snapshot>\n"
- "\treceive [-vnF] -d <filesystem>\n"));
+ "\treceive [-vnFu] [-d | -e] <filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
@@ -233,7 +257,7 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
- return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n"));
+ return (gettext("\tsend [-RDp] [-[iI] snapshot] <snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
"<filesystem|volume|snapshot> ...\n"));
@@ -246,10 +270,11 @@ get_usage(zfs_help_t idx)
return (gettext("\tunmount [-f] "
"<-a | filesystem|mountpoint>\n"));
case HELP_UNSHARE:
- return (gettext("\tunshare [-f] "
+ return (gettext("\tunshare "
"<-a | filesystem|mountpoint>\n"));
case HELP_ALLOW:
- return (gettext("\tallow [-ldug] "
+ return (gettext("\tallow <filesystem|volume>\n"
+ "\tallow [-ldug] "
"<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
"\t <filesystem|volume>\n"
"\tallow [-ld] -e <perm|@setname>[,...] "
@@ -275,28 +300,54 @@ get_usage(zfs_help_t idx)
return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
"[-sS field] ... [-t type[,...]]\n"
"\t <filesystem|snapshot>\n"));
+ case HELP_HOLD:
+ return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
+ case HELP_HOLDS:
+ return (gettext("\tholds [-r] <snapshot> ...\n"));
+ case HELP_RELEASE:
+ return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
+ case HELP_DIFF:
+ return (gettext("\tdiff [-FHt] <snapshot> "
+ "[snapshot|filesystem]\n"));
}
abort();
/* NOTREACHED */
}
+void
+nomem(void)
+{
+ (void) fprintf(stderr, gettext("internal error: out of memory\n"));
+ exit(1);
+}
+
/*
* Utility function to guarantee malloc() success.
*/
+
void *
safe_malloc(size_t size)
{
void *data;
- if ((data = calloc(1, size)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
+ if ((data = calloc(1, size)) == NULL)
+ nomem();
return (data);
}
+static char *
+safe_strdup(char *str)
+{
+ char *dupstr = strdup(str);
+
+ if (dupstr == NULL)
+ nomem();
+
+ return (dupstr);
+}
+
/*
* Callback routine that will print out information for each of
* the properties.
@@ -435,11 +486,8 @@ parseprop(nvlist_t *props)
"specified multiple times\n"), propname);
return (-1);
}
- if (nvlist_add_string(props, propname, propval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- return (-1);
- }
+ if (nvlist_add_string(props, propname, propval) != 0)
+ nomem();
return (0);
}
@@ -464,6 +512,59 @@ parse_depth(char *opt, int *flags)
return (depth);
}
+#define PROGRESS_DELAY 2 /* seconds */
+
+static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+static time_t pt_begin;
+static char *pt_header = NULL;
+static boolean_t pt_shown;
+
+static void
+start_progress_timer(void)
+{
+ pt_begin = time(NULL) + PROGRESS_DELAY;
+ pt_shown = B_FALSE;
+}
+
+static void
+set_progress_header(char *header)
+{
+ assert(pt_header == NULL);
+ pt_header = safe_strdup(header);
+ if (pt_shown) {
+ (void) printf("%s: ", header);
+ (void) fflush(stdout);
+ }
+}
+
+static void
+update_progress(char *update)
+{
+ if (!pt_shown && time(NULL) > pt_begin) {
+ int len = strlen(update);
+
+ (void) printf("%s: %s%*.*s", pt_header, update, len, len,
+ pt_reverse);
+ (void) fflush(stdout);
+ pt_shown = B_TRUE;
+ } else if (pt_shown) {
+ int len = strlen(update);
+
+ (void) printf("%s%*.*s", update, len, len, pt_reverse);
+ (void) fflush(stdout);
+ }
+}
+
+static void
+finish_progress(char *done)
+{
+ if (pt_shown) {
+ (void) printf("%s\n", done);
+ (void) fflush(stdout);
+ }
+ free(pt_header);
+ pt_header = NULL;
+}
/*
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
*
@@ -483,11 +584,8 @@ zfs_do_clone(int argc, char **argv)
int ret;
int c;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, "o:p")) != -1) {
@@ -552,8 +650,9 @@ zfs_do_clone(int argc, char **argv)
clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
if (clone != NULL) {
- if ((ret = zfs_mount(clone, NULL, 0)) == 0)
- ret = zfs_share(clone);
+ if (zfs_get_type(clone) != ZFS_TYPE_VOLUME)
+ if ((ret = zfs_mount(clone, NULL, 0)) == 0)
+ ret = zfs_share(clone);
zfs_close(clone);
}
}
@@ -599,13 +698,10 @@ zfs_do_create(int argc, char **argv)
int ret = 1;
nvlist_t *props;
uint64_t intval;
- int canmount;
+ int canmount = ZFS_CANMOUNT_OFF;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) {
@@ -620,12 +716,8 @@ zfs_do_create(int argc, char **argv)
}
if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0)
+ nomem();
volsize = intval;
break;
case 'p':
@@ -642,11 +734,8 @@ zfs_do_create(int argc, char **argv)
if (nvlist_add_uint64(props,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
+ intval) != 0)
+ nomem();
break;
case 'o':
if (parseprop(props))
@@ -708,15 +797,14 @@ zfs_do_create(int argc, char **argv)
resv_prop = ZFS_PROP_REFRESERVATION;
else
resv_prop = ZFS_PROP_RESERVATION;
+ volsize = zvol_volsize_to_reservation(volsize, props);
if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
&strval) != 0) {
if (nvlist_add_uint64(props,
zfs_prop_to_name(resv_prop), volsize) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
nvlist_free(props);
- return (1);
+ nomem();
}
}
}
@@ -741,19 +829,20 @@ zfs_do_create(int argc, char **argv)
if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
goto error;
+
+ ret = 0;
/*
* if the user doesn't want the dataset automatically mounted,
* then skip the mount/share step
*/
-
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
+ if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type))
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
/*
* Mount and/or share the new filesystem as appropriate. We provide a
* verbose error message to let the user know that their filesystem was
* in fact created, even if we failed to mount or share it.
*/
- ret = 0;
if (canmount == ZFS_CANMOUNT_ON) {
if (zfs_mount(zhp, NULL, 0) != 0) {
(void) fprintf(stderr, gettext("filesystem "
@@ -778,11 +867,13 @@ badusage:
}
/*
- * zfs destroy [-rf] <fs, snap, vol>
+ * zfs destroy [-rRf] <fs, vol>
+ * zfs destroy [-rRd] <snap>
*
- * -r Recursively destroy all children
- * -R Recursively destroy all dependents, including clones
- * -f Force unmounting of any dependents
+ * -r Recursively destroy all children
+ * -R Recursively destroy all dependents, including clones
+ * -f Force unmounting of any dependents
+ * -d If we can't destroy now, mark for deferred destruction
*
* Destroys the given dataset. By default, it will unmount any filesystems,
* and refuse to destroy a dataset that has any dependents. A dependent can
@@ -798,6 +889,7 @@ typedef struct destroy_cbdata {
boolean_t cb_closezhp;
zfs_handle_t *cb_target;
char *cb_snapname;
+ boolean_t cb_defer_destroy;
} destroy_cbdata_t;
/*
@@ -866,7 +958,7 @@ destroy_callback(zfs_handle_t *zhp, void *data)
/*
* Ignore pools (which we've already flagged as an error before getting
- * here.
+ * here).
*/
if (strchr(zfs_get_name(zhp), '/') == NULL &&
zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
@@ -878,7 +970,7 @@ destroy_callback(zfs_handle_t *zhp, void *data)
* Bail out on the first error.
*/
if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
- zfs_destroy(zhp) != 0) {
+ zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) {
zfs_close(zhp);
return (-1);
}
@@ -930,10 +1022,15 @@ zfs_do_destroy(int argc, char **argv)
int c;
zfs_handle_t *zhp;
char *cp;
+ zfs_type_t type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, "frR")) != -1) {
+ while ((c = getopt(argc, argv, "dfrR")) != -1) {
switch (c) {
+ case 'd':
+ cb.cb_defer_destroy = B_TRUE;
+ type = ZFS_TYPE_SNAPSHOT;
+ break;
case 'f':
cb.cb_force = 1;
break;
@@ -979,14 +1076,22 @@ zfs_do_destroy(int argc, char **argv)
cp++;
if (cb.cb_doclones) {
+ boolean_t defer = cb.cb_defer_destroy;
+
+ /*
+ * Temporarily ignore the defer_destroy setting since
+ * it's not supported for clones.
+ */
+ cb.cb_defer_destroy = B_FALSE;
cb.cb_snapname = cp;
if (destroy_snap_clones(zhp, &cb) != 0) {
zfs_close(zhp);
return (1);
}
+ cb.cb_defer_destroy = defer;
}
- ret = zfs_destroy_snaps(zhp, cp);
+ ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy);
zfs_close(zhp);
if (ret) {
(void) fprintf(stderr,
@@ -995,9 +1100,8 @@ zfs_do_destroy(int argc, char **argv)
return (ret != 0);
}
-
/* Open the given dataset */
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
return (1);
cb.cb_target = zhp;
@@ -1023,15 +1127,15 @@ zfs_do_destroy(int argc, char **argv)
* Check for any dependents and/or clones.
*/
cb.cb_first = B_TRUE;
- if (!cb.cb_doclones &&
+ if (!cb.cb_doclones && !cb.cb_defer_destroy &&
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
&cb) != 0) {
zfs_close(zhp);
return (1);
}
- if (cb.cb_error ||
- zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
+ if (cb.cb_error || (!cb.cb_defer_destroy &&
+ (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) {
zfs_close(zhp);
return (1);
}
@@ -1044,22 +1148,35 @@ zfs_do_destroy(int argc, char **argv)
if (destroy_callback(zhp, &cb) != 0)
return (1);
-
return (0);
}
+static boolean_t
+is_recvd_column(zprop_get_cbdata_t *cbp)
+{
+ int i;
+ zfs_get_column_t col;
+
+ for (i = 0; i < ZFS_GET_NCOLS &&
+ (col = cbp->cb_columns[i]) != GET_COL_NONE; i++)
+ if (col == GET_COL_RECVD)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
/*
- * zfs get [-rHp] [-o field[,field]...] [-s source[,source]...]
- * < all | property[,property]... > < fs | snap | vol > ...
+ * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...]
+ * < all | property[,property]... > < fs | snap | vol > ...
*
* -r recurse over any child datasets
* -H scripted mode. Headers are stripped, and fields are separated
* by tabs instead of spaces.
- * -o Set of fields to display. One of "name,property,value,source".
- * Default is all four.
+ * -o Set of fields to display. One of "name,property,value,
+ * received,source". Default is "name,property,value,source".
+ * "all" is an alias for all five.
* -s Set of sources to allow. One of
- * "local,default,inherited,temporary,none". Default is all
- * five.
+ * "local,default,inherited,received,temporary,none". Default is
+ * all six.
* -p Display values in parsable (literal) format.
*
* Prints properties for the given datasets. The user can control which
@@ -1073,16 +1190,19 @@ static int
get_callback(zfs_handle_t *zhp, void *data)
{
char buf[ZFS_MAXPROPLEN];
+ char rbuf[ZFS_MAXPROPLEN];
zprop_source_t sourcetype;
char source[ZFS_MAXNAMELEN];
zprop_get_cbdata_t *cbp = data;
- nvlist_t *userprop = zfs_get_user_props(zhp);
+ nvlist_t *user_props = zfs_get_user_props(zhp);
zprop_list_t *pl = cbp->cb_proplist;
nvlist_t *propval;
char *strval;
char *sourceval;
+ boolean_t received = is_recvd_column(cbp);
for (; pl != NULL; pl = pl->pl_next) {
+ char *recvdval = NULL;
/*
* Skip the special fake placeholder. This will also skip over
* the name property when 'all' is specified.
@@ -1109,9 +1229,14 @@ get_callback(zfs_handle_t *zhp, void *data)
(void) strlcpy(buf, "-", sizeof (buf));
}
+ if (received && (zfs_prop_get_recvd(zhp,
+ zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf),
+ cbp->cb_literal) == 0))
+ recvdval = rbuf;
+
zprop_print_one_property(zfs_get_name(zhp), cbp,
zfs_prop_to_name(pl->pl_prop),
- buf, sourcetype, source);
+ buf, sourcetype, source, recvdval);
} else if (zfs_prop_userquota(pl->pl_user_prop)) {
sourcetype = ZPROP_SRC_LOCAL;
@@ -1122,9 +1247,9 @@ get_callback(zfs_handle_t *zhp, void *data)
}
zprop_print_one_property(zfs_get_name(zhp), cbp,
- pl->pl_user_prop, buf, sourcetype, source);
+ pl->pl_user_prop, buf, sourcetype, source, NULL);
} else {
- if (nvlist_lookup_nvlist(userprop,
+ if (nvlist_lookup_nvlist(user_props,
pl->pl_user_prop, &propval) != 0) {
if (pl->pl_all)
continue;
@@ -1139,6 +1264,9 @@ get_callback(zfs_handle_t *zhp, void *data)
if (strcmp(sourceval,
zfs_get_name(zhp)) == 0) {
sourcetype = ZPROP_SRC_LOCAL;
+ } else if (strcmp(sourceval,
+ ZPROP_SOURCE_VAL_RECVD) == 0) {
+ sourcetype = ZPROP_SRC_RECEIVED;
} else {
sourcetype = ZPROP_SRC_INHERITED;
(void) strlcpy(source,
@@ -1146,9 +1274,14 @@ get_callback(zfs_handle_t *zhp, void *data)
}
}
+ if (received && (zfs_prop_get_recvd(zhp,
+ pl->pl_user_prop, rbuf, sizeof (rbuf),
+ cbp->cb_literal) == 0))
+ recvdval = rbuf;
+
zprop_print_one_property(zfs_get_name(zhp), cbp,
pl->pl_user_prop, strval, sourcetype,
- source);
+ source, recvdval);
}
}
@@ -1204,10 +1337,10 @@ zfs_do_get(int argc, char **argv)
i = 0;
while (*optarg != '\0') {
static char *col_subopts[] =
- { "name", "property", "value", "source",
- NULL };
+ { "name", "property", "value", "received",
+ "source", "all", NULL };
- if (i == 4) {
+ if (i == ZFS_GET_NCOLS) {
(void) fprintf(stderr, gettext("too "
"many fields given to -o "
"option\n"));
@@ -1226,8 +1359,28 @@ zfs_do_get(int argc, char **argv)
cb.cb_columns[i++] = GET_COL_VALUE;
break;
case 3:
+ cb.cb_columns[i++] = GET_COL_RECVD;
+ flags |= ZFS_ITER_RECVD_PROPS;
+ break;
+ case 4:
cb.cb_columns[i++] = GET_COL_SOURCE;
break;
+ case 5:
+ if (i > 0) {
+ (void) fprintf(stderr,
+ gettext("\"all\" conflicts "
+ "with specific fields "
+ "given to -o option\n"));
+ usage(B_FALSE);
+ }
+ cb.cb_columns[0] = GET_COL_NAME;
+ cb.cb_columns[1] = GET_COL_PROPERTY;
+ cb.cb_columns[2] = GET_COL_VALUE;
+ cb.cb_columns[3] = GET_COL_RECVD;
+ cb.cb_columns[4] = GET_COL_SOURCE;
+ flags |= ZFS_ITER_RECVD_PROPS;
+ i = ZFS_GET_NCOLS;
+ break;
default:
(void) fprintf(stderr,
gettext("invalid column name "
@@ -1242,7 +1395,8 @@ zfs_do_get(int argc, char **argv)
while (*optarg != '\0') {
static char *source_subopts[] = {
"local", "default", "inherited",
- "temporary", "none", NULL };
+ "received", "temporary", "none",
+ NULL };
switch (getsubopt(&optarg, source_subopts,
&value)) {
@@ -1256,9 +1410,12 @@ zfs_do_get(int argc, char **argv)
cb.cb_sources |= ZPROP_SRC_INHERITED;
break;
case 3:
- cb.cb_sources |= ZPROP_SRC_TEMPORARY;
+ cb.cb_sources |= ZPROP_SRC_RECEIVED;
break;
case 4:
+ cb.cb_sources |= ZPROP_SRC_TEMPORARY;
+ break;
+ case 5:
cb.cb_sources |= ZPROP_SRC_NONE;
break;
default:
@@ -1325,9 +1482,10 @@ zfs_do_get(int argc, char **argv)
}
/*
- * inherit [-r] <property> <fs|vol> ...
+ * inherit [-rS] <property> <fs|vol> ...
*
- * -r Recurse over all children
+ * -r Recurse over all children
+ * -S Revert to received value, if any
*
* For each dataset specified on the command line, inherit the given property
* from its parent. Inheriting a property at the pool level will cause it to
@@ -1336,11 +1494,16 @@ zfs_do_get(int argc, char **argv)
* local modifications for each dataset.
*/
+typedef struct inherit_cbdata {
+ const char *cb_propname;
+ boolean_t cb_received;
+} inherit_cbdata_t;
+
static int
inherit_recurse_cb(zfs_handle_t *zhp, void *data)
{
- char *propname = data;
- zfs_prop_t prop = zfs_name_to_prop(propname);
+ inherit_cbdata_t *cb = data;
+ zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname);
/*
* If we're doing it recursively, then ignore properties that
@@ -1350,15 +1513,15 @@ inherit_recurse_cb(zfs_handle_t *zhp, void *data)
!zfs_prop_valid_for_type(prop, zfs_get_type(zhp)))
return (0);
- return (zfs_prop_inherit(zhp, propname) != 0);
+ return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
}
static int
inherit_cb(zfs_handle_t *zhp, void *data)
{
- char *propname = data;
+ inherit_cbdata_t *cb = data;
- return (zfs_prop_inherit(zhp, propname) != 0);
+ return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
}
static int
@@ -1366,16 +1529,21 @@ zfs_do_inherit(int argc, char **argv)
{
int c;
zfs_prop_t prop;
+ inherit_cbdata_t cb = { 0 };
char *propname;
int ret;
int flags = 0;
+ boolean_t received = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, "r")) != -1) {
+ while ((c = getopt(argc, argv, "rS")) != -1) {
switch (c) {
case 'r':
flags |= ZFS_ITER_RECURSE;
break;
+ case 'S':
+ received = B_TRUE;
+ break;
case '?':
default:
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -1408,7 +1576,7 @@ zfs_do_inherit(int argc, char **argv)
propname);
return (1);
}
- if (!zfs_prop_inheritable(prop)) {
+ if (!zfs_prop_inheritable(prop) && !received) {
(void) fprintf(stderr, gettext("'%s' property cannot "
"be inherited\n"), propname);
if (prop == ZFS_PROP_QUOTA ||
@@ -1419,18 +1587,27 @@ zfs_do_inherit(int argc, char **argv)
"%s=none' to clear\n"), propname);
return (1);
}
+ if (received && (prop == ZFS_PROP_VOLSIZE ||
+ prop == ZFS_PROP_VERSION)) {
+ (void) fprintf(stderr, gettext("'%s' property cannot "
+ "be reverted to a received value\n"), propname);
+ return (1);
+ }
} else if (!zfs_prop_user(propname)) {
(void) fprintf(stderr, gettext("invalid property '%s'\n"),
propname);
usage(B_FALSE);
}
+ cb.cb_propname = propname;
+ cb.cb_received = received;
+
if (flags & ZFS_ITER_RECURSE) {
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
- NULL, NULL, 0, inherit_recurse_cb, propname);
+ NULL, NULL, 0, inherit_recurse_cb, &cb);
} else {
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
- NULL, NULL, 0, inherit_cb, propname);
+ NULL, NULL, 0, inherit_cb, &cb);
}
return (ret);
@@ -1499,31 +1676,25 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
{
upgrade_cbdata_t *cb = data;
int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
- int i;
- static struct { int zplver; int spaver; } table[] = {
- {ZPL_VERSION_FUID, SPA_VERSION_FUID},
- {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
- {0, 0}
- };
+ int needed_spa_version;
+ int spa_version;
+ if (zfs_spa_version(zhp, &spa_version) < 0)
+ return (-1);
- for (i = 0; table[i].zplver; i++) {
- if (cb->cb_version >= table[i].zplver) {
- int spa_version;
+ needed_spa_version = zfs_spa_version_map(cb->cb_version);
- if (zfs_spa_version(zhp, &spa_version) < 0)
- return (-1);
+ if (needed_spa_version < 0)
+ return (-1);
- if (spa_version < table[i].spaver) {
- /* can't upgrade */
- (void) printf(gettext("%s: can not be "
- "upgraded; the pool version needs to first "
- "be upgraded\nto version %d\n\n"),
- zfs_get_name(zhp), table[i].spaver);
- cb->cb_numfailed++;
- return (0);
- }
- }
+ if (spa_version < needed_spa_version) {
+ /* can't upgrade */
+ (void) printf(gettext("%s: can not be "
+ "upgraded; the pool version needs to first "
+ "be upgraded\nto version %d\n\n"),
+ zfs_get_name(zhp), needed_spa_version);
+ cb->cb_numfailed++;
+ return (0);
}
/* upgrade */
@@ -1622,14 +1793,13 @@ zfs_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 1 Initial ZFS filesystem version\n"));
(void) printf(gettext(" 2 Enhanced directory entries\n"));
(void) printf(gettext(" 3 Case insensitive and File system "
- "unique identifer (FUID)\n"));
+ "unique identifier (FUID)\n"));
(void) printf(gettext(" 4 userquota, groupquota "
"properties\n"));
+ (void) printf(gettext(" 5 System attributes\n"));
(void) printf(gettext("\nFor more information on a particular "
- "version, including supported releases, see:\n\n"));
- (void) printf("http://www.opensolaris.org/os/community/zfs/"
- "version/zpl/N\n\n");
- (void) printf(gettext("Where 'N' is the version number.\n"));
+ "version, including supported releases,\n"));
+ (void) printf("see the ZFS Administration Guide.\n\n");
ret = 0;
} else if (argc || all) {
/* Upgrade filesystems */
@@ -1672,82 +1842,730 @@ zfs_do_upgrade(int argc, char **argv)
return (ret);
}
+#define USTYPE_USR_BIT (0)
+#define USTYPE_GRP_BIT (1)
+#define USTYPE_PSX_BIT (2)
+#define USTYPE_SMB_BIT (3)
+
+#define USTYPE_USR (1 << USTYPE_USR_BIT)
+#define USTYPE_GRP (1 << USTYPE_GRP_BIT)
+
+#define USTYPE_PSX (1 << USTYPE_PSX_BIT)
+#define USTYPE_SMB (1 << USTYPE_SMB_BIT)
+
+#define USTYPE_PSX_USR (USTYPE_PSX | USTYPE_USR)
+#define USTYPE_SMB_USR (USTYPE_SMB | USTYPE_USR)
+#define USTYPE_PSX_GRP (USTYPE_PSX | USTYPE_GRP)
+#define USTYPE_SMB_GRP (USTYPE_SMB | USTYPE_GRP)
+#define USTYPE_ALL (USTYPE_PSX_USR | USTYPE_SMB_USR \
+ | USTYPE_PSX_GRP | USTYPE_SMB_GRP)
+
+
+#define USPROP_USED_BIT (0)
+#define USPROP_QUOTA_BIT (1)
+
+#define USPROP_USED (1 << USPROP_USED_BIT)
+#define USPROP_QUOTA (1 << USPROP_QUOTA_BIT)
+
+typedef struct us_node {
+ nvlist_t *usn_nvl;
+ uu_avl_node_t usn_avlnode;
+ uu_list_node_t usn_listnode;
+} us_node_t;
+
+typedef struct us_cbdata {
+ nvlist_t **cb_nvlp;
+ uu_avl_pool_t *cb_avl_pool;
+ uu_avl_t *cb_avl;
+ boolean_t cb_numname;
+ boolean_t cb_nicenum;
+ boolean_t cb_sid2posix;
+ zfs_userquota_prop_t cb_prop;
+ zfs_sort_column_t *cb_sortcol;
+ size_t cb_max_typelen;
+ size_t cb_max_namelen;
+ size_t cb_max_usedlen;
+ size_t cb_max_quotalen;
+} us_cbdata_t;
+
+typedef struct {
+ zfs_sort_column_t *si_sortcol;
+ boolean_t si_num_name;
+ boolean_t si_parsable;
+} us_sort_info_t;
+
+static int
+us_compare(const void *larg, const void *rarg, void *unused)
+{
+ const us_node_t *l = larg;
+ const us_node_t *r = rarg;
+ int rc = 0;
+ us_sort_info_t *si = (us_sort_info_t *)unused;
+ zfs_sort_column_t *sortcol = si->si_sortcol;
+ boolean_t num_name = si->si_num_name;
+ nvlist_t *lnvl = l->usn_nvl;
+ nvlist_t *rnvl = r->usn_nvl;
+
+ for (; sortcol != NULL; sortcol = sortcol->sc_next) {
+ char *lvstr = "";
+ char *rvstr = "";
+ uint32_t lv32 = 0;
+ uint32_t rv32 = 0;
+ uint64_t lv64 = 0;
+ uint64_t rv64 = 0;
+ zfs_prop_t prop = sortcol->sc_prop;
+ const char *propname = NULL;
+ boolean_t reverse = sortcol->sc_reverse;
+
+ switch (prop) {
+ case ZFS_PROP_TYPE:
+ propname = "type";
+ (void) nvlist_lookup_uint32(lnvl, propname, &lv32);
+ (void) nvlist_lookup_uint32(rnvl, propname, &rv32);
+ if (rv32 != lv32)
+ rc = (rv32 > lv32) ? 1 : -1;
+ break;
+ case ZFS_PROP_NAME:
+ propname = "name";
+ if (num_name) {
+ (void) nvlist_lookup_uint32(lnvl, propname,
+ &lv32);
+ (void) nvlist_lookup_uint32(rnvl, propname,
+ &rv32);
+ if (rv32 != lv32)
+ rc = (rv32 > lv32) ? 1 : -1;
+ } else {
+ (void) nvlist_lookup_string(lnvl, propname,
+ &lvstr);
+ (void) nvlist_lookup_string(rnvl, propname,
+ &rvstr);
+ rc = strcmp(lvstr, rvstr);
+ }
+ break;
+
+ case ZFS_PROP_USED:
+ case ZFS_PROP_QUOTA:
+ if (ZFS_PROP_USED == prop)
+ propname = "used";
+ else
+ propname = "quota";
+ (void) nvlist_lookup_uint64(lnvl, propname, &lv64);
+ (void) nvlist_lookup_uint64(rnvl, propname, &rv64);
+ if (rv64 != lv64)
+ rc = (rv64 > lv64) ? 1 : -1;
+ }
+
+ if (rc)
+ if (rc < 0)
+ return (reverse ? 1 : -1);
+ else
+ return (reverse ? -1 : 1);
+ }
+
+ return (rc);
+}
+
+static inline const char *
+us_type2str(unsigned field_type)
+{
+ switch (field_type) {
+ case USTYPE_PSX_USR:
+ return ("POSIX User");
+ case USTYPE_PSX_GRP:
+ return ("POSIX Group");
+ case USTYPE_SMB_USR:
+ return ("SMB User");
+ case USTYPE_SMB_GRP:
+ return ("SMB Group");
+ default:
+ return ("Undefined");
+ }
+}
+
/*
* zfs userspace
*/
static int
userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
{
- zfs_userquota_prop_t *typep = arg;
- zfs_userquota_prop_t p = *typep;
+ us_cbdata_t *cb = (us_cbdata_t *)arg;
+ zfs_userquota_prop_t prop = cb->cb_prop;
char *name = NULL;
- char *ug, *propname;
+ char *propname;
char namebuf[32];
char sizebuf[32];
+ us_node_t *node;
+ uu_avl_pool_t *avl_pool = cb->cb_avl_pool;
+ uu_avl_t *avl = cb->cb_avl;
+ uu_avl_index_t idx;
+ nvlist_t *props;
+ us_node_t *n;
+ zfs_sort_column_t *sortcol = cb->cb_sortcol;
+ unsigned type;
+ const char *typestr;
+ size_t namelen;
+ size_t typelen;
+ size_t sizelen;
+ us_sort_info_t sortinfo = { sortcol, cb->cb_numname };
if (domain == NULL || domain[0] == '\0') {
- if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) {
+ /* POSIX */
+ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
+ type = USTYPE_PSX_GRP;
struct group *g = getgrgid(rid);
if (g)
name = g->gr_name;
} else {
+ type = USTYPE_PSX_USR;
struct passwd *p = getpwuid(rid);
if (p)
name = p->pw_name;
}
+ } else {
+ char sid[ZFS_MAXNAMELEN+32];
+ uid_t id;
+ uint64_t classes;
+#ifdef sun
+ int err;
+ directory_error_t e;
+#endif
+
+ (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid);
+ /* SMB */
+ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
+ type = USTYPE_SMB_GRP;
+#ifdef sun
+ err = sid_to_id(sid, B_FALSE, &id);
+#endif
+ } else {
+ type = USTYPE_SMB_USR;
+#ifdef sun
+ err = sid_to_id(sid, B_TRUE, &id);
+#endif
+ }
+
+#ifdef sun
+ if (err == 0) {
+ rid = id;
+
+ e = directory_name_from_sid(NULL, sid, &name, &classes);
+ if (e != NULL) {
+ directory_error_free(e);
+ return (NULL);
+ }
+
+ if (name == NULL)
+ name = sid;
+ }
+#endif
}
- if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA)
- ug = "group";
- else
- ug = "user";
+/*
+ * if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA)
+ * ug = "group";
+ * else
+ * ug = "user";
+ */
- if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED)
+ if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED)
propname = "used";
else
propname = "quota";
- if (name == NULL) {
- (void) snprintf(namebuf, sizeof (namebuf),
- "%llu", (longlong_t)rid);
+ (void) snprintf(namebuf, sizeof (namebuf), "%u", rid);
+ if (name == NULL)
name = namebuf;
+
+ if (cb->cb_nicenum)
+ zfs_nicenum(space, sizebuf, sizeof (sizebuf));
+ else
+ (void) sprintf(sizebuf, "%llu", space);
+
+ node = safe_malloc(sizeof (us_node_t));
+ uu_avl_node_init(node, &node->usn_avlnode, avl_pool);
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+ free(node);
+ return (-1);
}
- zfs_nicenum(space, sizebuf, sizeof (sizebuf));
- (void) printf("%s %s %s%c%s %s\n", propname, ug, domain,
- domain[0] ? '-' : ' ', name, sizebuf);
+ if (nvlist_add_uint32(props, "type", type) != 0)
+ nomem();
+
+ if (cb->cb_numname) {
+ if (nvlist_add_uint32(props, "name", rid) != 0)
+ nomem();
+ namelen = strlen(namebuf);
+ } else {
+ if (nvlist_add_string(props, "name", name) != 0)
+ nomem();
+ namelen = strlen(name);
+ }
+
+ typestr = us_type2str(type);
+ typelen = strlen(gettext(typestr));
+ if (typelen > cb->cb_max_typelen)
+ cb->cb_max_typelen = typelen;
+
+ if (namelen > cb->cb_max_namelen)
+ cb->cb_max_namelen = namelen;
+
+ sizelen = strlen(sizebuf);
+ if (0 == strcmp(propname, "used")) {
+ if (sizelen > cb->cb_max_usedlen)
+ cb->cb_max_usedlen = sizelen;
+ } else {
+ if (sizelen > cb->cb_max_quotalen)
+ cb->cb_max_quotalen = sizelen;
+ }
+
+ node->usn_nvl = props;
+
+ n = uu_avl_find(avl, node, &sortinfo, &idx);
+ if (n == NULL)
+ uu_avl_insert(avl, node, idx);
+ else {
+ nvlist_free(props);
+ free(node);
+ node = n;
+ props = node->usn_nvl;
+ }
+
+ if (nvlist_add_uint64(props, propname, space) != 0)
+ nomem();
return (0);
}
+static inline boolean_t
+usprop_check(zfs_userquota_prop_t p, unsigned types, unsigned props)
+{
+ unsigned type;
+ unsigned prop;
+
+ switch (p) {
+ case ZFS_PROP_USERUSED:
+ type = USTYPE_USR;
+ prop = USPROP_USED;
+ break;
+ case ZFS_PROP_USERQUOTA:
+ type = USTYPE_USR;
+ prop = USPROP_QUOTA;
+ break;
+ case ZFS_PROP_GROUPUSED:
+ type = USTYPE_GRP;
+ prop = USPROP_USED;
+ break;
+ case ZFS_PROP_GROUPQUOTA:
+ type = USTYPE_GRP;
+ prop = USPROP_QUOTA;
+ break;
+ default: /* ALL */
+ return (B_TRUE);
+ };
+
+ return (type & types && prop & props);
+}
+
+#define USFIELD_TYPE (1 << 0)
+#define USFIELD_NAME (1 << 1)
+#define USFIELD_USED (1 << 2)
+#define USFIELD_QUOTA (1 << 3)
+#define USFIELD_ALL (USFIELD_TYPE | USFIELD_NAME | USFIELD_USED | USFIELD_QUOTA)
+
+static int
+parsefields(unsigned *fieldsp, char **names, unsigned *bits, size_t len)
+{
+ char *field = optarg;
+ char *delim;
+
+ do {
+ int i;
+ boolean_t found = B_FALSE;
+ delim = strchr(field, ',');
+ if (delim != NULL)
+ *delim = '\0';
+
+ for (i = 0; i < len; i++)
+ if (0 == strcmp(field, names[i])) {
+ found = B_TRUE;
+ *fieldsp |= bits[i];
+ break;
+ }
+
+ if (!found) {
+ (void) fprintf(stderr, gettext("invalid type '%s'"
+ "for -t option\n"), field);
+ return (-1);
+ }
+
+ field = delim + 1;
+ } while (delim);
+
+ return (0);
+}
+
+
+static char *type_names[] = { "posixuser", "smbuser", "posixgroup", "smbgroup",
+ "all" };
+static unsigned type_bits[] = {
+ USTYPE_PSX_USR,
+ USTYPE_SMB_USR,
+ USTYPE_PSX_GRP,
+ USTYPE_SMB_GRP,
+ USTYPE_ALL
+};
+
+static char *us_field_names[] = { "type", "name", "used", "quota" };
+static unsigned us_field_bits[] = {
+ USFIELD_TYPE,
+ USFIELD_NAME,
+ USFIELD_USED,
+ USFIELD_QUOTA
+};
+
+static void
+print_us_node(boolean_t scripted, boolean_t parseable, unsigned fields,
+ size_t type_width, size_t name_width, size_t used_width,
+ size_t quota_width, us_node_t *node)
+{
+ nvlist_t *nvl = node->usn_nvl;
+ nvpair_t *nvp = NULL;
+ char valstr[ZFS_MAXNAMELEN];
+ boolean_t first = B_TRUE;
+ boolean_t quota_found = B_FALSE;
+
+ if (fields & USFIELD_QUOTA && !nvlist_exists(nvl, "quota"))
+ if (nvlist_add_string(nvl, "quota", "none") != 0)
+ nomem();
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ char *pname = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ uint32_t val32 = 0;
+ uint64_t val64 = 0;
+ char *strval = NULL;
+ unsigned field = 0;
+ unsigned width = 0;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (0 == strcmp(pname, us_field_names[i])) {
+ field = us_field_bits[i];
+ break;
+ }
+ }
+
+ if (!(field & fields))
+ continue;
+
+ switch (type) {
+ case DATA_TYPE_UINT32:
+ (void) nvpair_value_uint32(nvp, &val32);
+ break;
+ case DATA_TYPE_UINT64:
+ (void) nvpair_value_uint64(nvp, &val64);
+ break;
+ case DATA_TYPE_STRING:
+ (void) nvpair_value_string(nvp, &strval);
+ break;
+ default:
+ (void) fprintf(stderr, "Invalid data type\n");
+ }
+
+ if (!first)
+ if (scripted)
+ (void) printf("\t");
+ else
+ (void) printf(" ");
+
+ switch (field) {
+ case USFIELD_TYPE:
+ strval = (char *)us_type2str(val32);
+ width = type_width;
+ break;
+ case USFIELD_NAME:
+ if (type == DATA_TYPE_UINT64) {
+ (void) sprintf(valstr, "%llu", val64);
+ strval = valstr;
+ }
+ width = name_width;
+ break;
+ case USFIELD_USED:
+ case USFIELD_QUOTA:
+ if (type == DATA_TYPE_UINT64) {
+ (void) nvpair_value_uint64(nvp, &val64);
+ if (parseable)
+ (void) sprintf(valstr, "%llu", val64);
+ else
+ zfs_nicenum(val64, valstr,
+ sizeof (valstr));
+ strval = valstr;
+ }
+
+ if (field == USFIELD_USED)
+ width = used_width;
+ else {
+ quota_found = B_FALSE;
+ width = quota_width;
+ }
+
+ break;
+ }
+
+ if (field == USFIELD_QUOTA && !quota_found)
+ (void) printf("%*s", width, strval);
+ else {
+ if (type == DATA_TYPE_STRING)
+ (void) printf("%-*s", width, strval);
+ else
+ (void) printf("%*s", width, strval);
+ }
+
+ first = B_FALSE;
+
+ }
+
+ (void) printf("\n");
+}
+
+static void
+print_us(boolean_t scripted, boolean_t parsable, unsigned fields,
+ unsigned type_width, unsigned name_width, unsigned used_width,
+ unsigned quota_width, boolean_t rmnode, uu_avl_t *avl)
+{
+ static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" };
+ us_node_t *node;
+ const char *col;
+ int i;
+ size_t width[4] = { type_width, name_width, used_width, quota_width };
+
+ if (!scripted) {
+ boolean_t first = B_TRUE;
+ for (i = 0; i < 4; i++) {
+ unsigned field = us_field_bits[i];
+ if (!(field & fields))
+ continue;
+
+ col = gettext(us_field_hdr[i]);
+ if (field == USFIELD_TYPE || field == USFIELD_NAME)
+ (void) printf(first?"%-*s":" %-*s", width[i],
+ col);
+ else
+ (void) printf(first?"%*s":" %*s", width[i],
+ col);
+ first = B_FALSE;
+ }
+ (void) printf("\n");
+ }
+
+ for (node = uu_avl_first(avl); node != NULL;
+ node = uu_avl_next(avl, node)) {
+ print_us_node(scripted, parsable, fields, type_width,
+ name_width, used_width, used_width, node);
+ if (rmnode)
+ nvlist_free(node->usn_nvl);
+ }
+}
+
static int
zfs_do_userspace(int argc, char **argv)
{
zfs_handle_t *zhp;
zfs_userquota_prop_t p;
+
+ uu_avl_pool_t *avl_pool;
+ uu_avl_t *avl_tree;
+ uu_avl_walk_t *walk;
+
+ char *cmd;
+ boolean_t scripted = B_FALSE;
+ boolean_t prtnum = B_FALSE;
+ boolean_t parseable = B_FALSE;
+ boolean_t sid2posix = B_FALSE;
int error;
+ int c;
+ zfs_sort_column_t *default_sortcol = NULL;
+ zfs_sort_column_t *sortcol = NULL;
+ unsigned types = USTYPE_PSX_USR | USTYPE_SMB_USR;
+ unsigned fields = 0;
+ unsigned props = USPROP_USED | USPROP_QUOTA;
+ us_cbdata_t cb;
+ us_node_t *node;
+ boolean_t resort_avl = B_FALSE;
+
+ if (argc < 2)
+ usage(B_FALSE);
- /*
- * Try the python version. If the execv fails, we'll continue
- * and do a simplistic implementation.
- */
- (void) execv(pypath, argv-1);
+ cmd = argv[0];
+ if (0 == strcmp(cmd, "groupspace"))
+ /* toggle default group types */
+ types = USTYPE_PSX_GRP | USTYPE_SMB_GRP;
+
+ /* check options */
+ while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) {
+ switch (c) {
+ case 'n':
+ prtnum = B_TRUE;
+ break;
+ case 'H':
+ scripted = B_TRUE;
+ break;
+ case 'p':
+ parseable = B_TRUE;
+ break;
+ case 'o':
+ if (parsefields(&fields, us_field_names, us_field_bits,
+ 4) != 0)
+ return (1);
+ break;
+ case 's':
+ if (zfs_add_sort_column(&sortcol, optarg,
+ B_FALSE) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid property '%s'\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
+ case 'S':
+ if (zfs_add_sort_column(&sortcol, optarg,
+ B_TRUE) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid property '%s'\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
+ case 't':
+ if (parsefields(&types, type_names, type_bits, 5))
+ return (1);
+ break;
+ case 'i':
+ sid2posix = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
- (void) fprintf(stderr, "internal error: %s not found\n"
- "falling back on built-in implementation, "
- "some features will not work\n", pypath);
- (void) fprintf(stderr, " install sysutils/py-zfs port to correct this\n");
+ /* ok, now we have sorted by default colums (type,name) avl tree */
+ if (sortcol) {
+ zfs_sort_column_t *sc;
+ for (sc = sortcol; sc; sc = sc->sc_next) {
+ if (sc->sc_prop == ZFS_PROP_QUOTA) {
+ resort_avl = B_TRUE;
+ break;
+ }
+ }
+ }
+
+ if (!fields)
+ fields = USFIELD_ALL;
if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL)
return (1);
- (void) printf("PROP TYPE NAME VALUE\n");
+ if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t),
+ offsetof(us_node_t, usn_avlnode),
+ us_compare, UU_DEFAULT)) == NULL)
+ nomem();
+ if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
+ nomem();
+
+ if (sortcol && !resort_avl)
+ cb.cb_sortcol = sortcol;
+ else {
+ (void) zfs_add_sort_column(&default_sortcol, "type", B_FALSE);
+ (void) zfs_add_sort_column(&default_sortcol, "name", B_FALSE);
+ cb.cb_sortcol = default_sortcol;
+ }
+ cb.cb_numname = prtnum;
+ cb.cb_nicenum = !parseable;
+ cb.cb_avl_pool = avl_pool;
+ cb.cb_avl = avl_tree;
+ cb.cb_sid2posix = sid2posix;
+ cb.cb_max_typelen = strlen(gettext("TYPE"));
+ cb.cb_max_namelen = strlen(gettext("NAME"));
+ cb.cb_max_usedlen = strlen(gettext("USED"));
+ cb.cb_max_quotalen = strlen(gettext("QUOTA"));
for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
- error = zfs_userspace(zhp, p, userspace_cb, &p);
+ if (!usprop_check(p, types, props))
+ continue;
+
+ cb.cb_prop = p;
+ error = zfs_userspace(zhp, p, userspace_cb, &cb);
+
if (error)
break;
}
+
+ if (resort_avl) {
+ us_node_t *node;
+ us_node_t *rmnode;
+ uu_list_pool_t *listpool;
+ uu_list_t *list;
+ uu_avl_index_t idx = 0;
+ uu_list_index_t idx2 = 0;
+ listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
+ offsetof(us_node_t, usn_listnode), NULL,
+ UU_DEFAULT);
+ list = uu_list_create(listpool, NULL, UU_DEFAULT);
+
+ node = uu_avl_first(avl_tree);
+ uu_list_node_init(node, &node->usn_listnode, listpool);
+ while (node != NULL) {
+ rmnode = node;
+ node = uu_avl_next(avl_tree, node);
+ uu_avl_remove(avl_tree, rmnode);
+ if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) {
+ uu_list_insert(list, rmnode, idx2);
+ }
+ }
+
+ for (node = uu_list_first(list); node != NULL;
+ node = uu_list_next(list, node)) {
+ us_sort_info_t sortinfo = { sortcol, cb.cb_numname };
+ if (uu_avl_find(avl_tree, node, &sortinfo, &idx) ==
+ NULL)
+ uu_avl_insert(avl_tree, node, idx);
+ }
+
+ uu_list_destroy(list);
+ }
+
+ /* print & free node`s nvlist memory */
+ print_us(scripted, parseable, fields, cb.cb_max_typelen,
+ cb.cb_max_namelen, cb.cb_max_usedlen,
+ cb.cb_max_quotalen, B_TRUE, cb.cb_avl);
+
+ if (sortcol)
+ zfs_free_sort_columns(sortcol);
+ zfs_free_sort_columns(default_sortcol);
+
+ /*
+ * Finally, clean up the AVL tree.
+ */
+ if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
+
+ while ((node = uu_avl_walk_next(walk)) != NULL) {
+ uu_avl_remove(cb.cb_avl, node);
+ free(node);
+ }
+
+ uu_avl_walk_end(walk);
+ uu_avl_destroy(avl_tree);
+ uu_avl_pool_destroy(avl_pool);
+
return (error);
}
@@ -1756,11 +2574,11 @@ zfs_do_userspace(int argc, char **argv)
* [-s property [-s property]...] [-S property [-S property]...]
* <dataset> ...
*
- * -r Recurse over all children
- * -d Limit recursion by depth.
- * -H Scripted mode; elide headers and separate columns by tabs
- * -o Control which fields to display.
- * -t Control which object types to display.
+ * -r Recurse over all children
+ * -d Limit recursion by depth.
+ * -H Scripted mode; elide headers and separate columns by tabs
+ * -o Control which fields to display.
+ * -t Control which object types to display.
* -s Specify sort columns, descending order.
* -S Specify sort columns, ascending order.
*
@@ -2157,9 +2975,9 @@ zfs_do_promote(int argc, char **argv)
/*
* zfs rollback [-rRf] <snapshot>
*
- * -r Delete any intervening snapshots before doing rollback
- * -R Delete any snapshots and their clones
- * -f ignored for backwards compatability
+ * -r Delete any intervening snapshots before doing rollback
+ * -R Delete any snapshots and their clones
+ * -f ignored for backwards compatability
*
* Given a filesystem, rollback to a specific snapshot, discarding any changes
* since then and making it the active dataset. If more recent snapshots exist,
@@ -2420,11 +3238,8 @@ zfs_do_snapshot(int argc, char **argv)
char c;
nvlist_t *props;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, "ro:")) != -1) {
@@ -2469,8 +3284,8 @@ usage:
}
/*
- * zfs send [-v] -R [-i|-I <@snap>] <fs@snap>
- * zfs send [-v] [-i|-I <@snap>] <fs@snap>
+ * zfs send [-vDp] -R [-i|-I <@snap>] <fs@snap>
+ * zfs send [-vDp] [-i|-I <@snap>] <fs@snap>
*
* Send a backup stream to stdout.
*/
@@ -2481,14 +3296,13 @@ zfs_do_send(int argc, char **argv)
char *toname = NULL;
char *cp;
zfs_handle_t *zhp;
- boolean_t doall = B_FALSE;
- boolean_t replicate = B_FALSE;
- boolean_t fromorigin = B_FALSE;
- boolean_t verbose = B_FALSE;
+ sendflags_t flags = { 0 };
int c, err;
+ nvlist_t *dbgnv;
+ boolean_t extraverbose = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, ":i:I:Rv")) != -1) {
+ while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) {
switch (c) {
case 'i':
if (fromname)
@@ -2499,13 +3313,21 @@ zfs_do_send(int argc, char **argv)
if (fromname)
usage(B_FALSE);
fromname = optarg;
- doall = B_TRUE;
+ flags.doall = B_TRUE;
break;
case 'R':
- replicate = B_TRUE;
+ flags.replicate = B_TRUE;
+ break;
+ case 'p':
+ flags.props = B_TRUE;
break;
case 'v':
- verbose = B_TRUE;
+ if (flags.verbose)
+ extraverbose = B_TRUE;
+ flags.verbose = B_TRUE;
+ break;
+ case 'D':
+ flags.dedup = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -2565,7 +3387,7 @@ zfs_do_send(int argc, char **argv)
if (strcmp(origin, fromname) == 0) {
fromname = NULL;
- fromorigin = B_TRUE;
+ flags.fromorigin = B_TRUE;
} else {
*cp = '\0';
if (cp != fromname && strcmp(argv[0], fromname)) {
@@ -2583,18 +3405,29 @@ zfs_do_send(int argc, char **argv)
}
}
- if (replicate && fromname == NULL)
- doall = B_TRUE;
+ if (flags.replicate && fromname == NULL)
+ flags.doall = B_TRUE;
+
+ err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0,
+ extraverbose ? &dbgnv : NULL);
- err = zfs_send(zhp, fromname, toname, replicate, doall, fromorigin,
- verbose, STDOUT_FILENO);
+ if (extraverbose) {
+ /*
+ * dump_nvlist prints to stdout, but that's been
+ * redirected to a file. Make it print to stderr
+ * instead.
+ */
+ (void) dup2(STDERR_FILENO, STDOUT_FILENO);
+ dump_nvlist(dbgnv, 0);
+ nvlist_free(dbgnv);
+ }
zfs_close(zhp);
return (err != 0);
}
/*
- * zfs receive [-dnvF] <fs@snap>
+ * zfs receive [-vnFu] [-d | -e] <fs@snap>
*
* Restore a backup stream from stdin.
*/
@@ -2602,15 +3435,18 @@ static int
zfs_do_receive(int argc, char **argv)
{
int c, err;
- recvflags_t flags;
+ recvflags_t flags = { 0 };
- bzero(&flags, sizeof (recvflags_t));
/* check options */
- while ((c = getopt(argc, argv, ":dnuvF")) != -1) {
+ while ((c = getopt(argc, argv, ":denuvF")) != -1) {
switch (c) {
case 'd':
flags.isprefix = B_TRUE;
break;
+ case 'e':
+ flags.isprefix = B_TRUE;
+ flags.istail = B_TRUE;
+ break;
case 'n':
flags.dryrun = B_TRUE;
break;
@@ -2661,13 +3497,1652 @@ zfs_do_receive(int argc, char **argv)
return (err != 0);
}
-typedef struct get_all_cbdata {
- zfs_handle_t **cb_handles;
- size_t cb_alloc;
- size_t cb_used;
- uint_t cb_types;
- boolean_t cb_verbose;
-} get_all_cbdata_t;
+/*
+ * allow/unallow stuff
+ */
+/* copied from zfs/sys/dsl_deleg.h */
+#define ZFS_DELEG_PERM_CREATE "create"
+#define ZFS_DELEG_PERM_DESTROY "destroy"
+#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
+#define ZFS_DELEG_PERM_ROLLBACK "rollback"
+#define ZFS_DELEG_PERM_CLONE "clone"
+#define ZFS_DELEG_PERM_PROMOTE "promote"
+#define ZFS_DELEG_PERM_RENAME "rename"
+#define ZFS_DELEG_PERM_MOUNT "mount"
+#define ZFS_DELEG_PERM_SHARE "share"
+#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_RECEIVE "receive"
+#define ZFS_DELEG_PERM_ALLOW "allow"
+#define ZFS_DELEG_PERM_USERPROP "userprop"
+#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */
+#define ZFS_DELEG_PERM_USERQUOTA "userquota"
+#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
+#define ZFS_DELEG_PERM_USERUSED "userused"
+#define ZFS_DELEG_PERM_GROUPUSED "groupused"
+#define ZFS_DELEG_PERM_HOLD "hold"
+#define ZFS_DELEG_PERM_RELEASE "release"
+#define ZFS_DELEG_PERM_DIFF "diff"
+
+#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
+
+static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
+ { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW },
+ { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
+ { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
+ { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
+ { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
+ { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
+ { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
+ { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
+ { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
+ { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
+ { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
+ { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
+ { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
+ { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
+ { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
+
+ { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+ { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
+ { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+ { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+ { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+ { NULL, ZFS_DELEG_NOTE_NONE }
+};
+
+/* permission structure */
+typedef struct deleg_perm {
+ zfs_deleg_who_type_t dp_who_type;
+ const char *dp_name;
+ boolean_t dp_local;
+ boolean_t dp_descend;
+} deleg_perm_t;
+
+/* */
+typedef struct deleg_perm_node {
+ deleg_perm_t dpn_perm;
+
+ uu_avl_node_t dpn_avl_node;
+} deleg_perm_node_t;
+
+typedef struct fs_perm fs_perm_t;
+
+/* permissions set */
+typedef struct who_perm {
+ zfs_deleg_who_type_t who_type;
+ const char *who_name; /* id */
+ char who_ug_name[256]; /* user/group name */
+ fs_perm_t *who_fsperm; /* uplink */
+
+ uu_avl_t *who_deleg_perm_avl; /* permissions */
+} who_perm_t;
+
+/* */
+typedef struct who_perm_node {
+ who_perm_t who_perm;
+ uu_avl_node_t who_avl_node;
+} who_perm_node_t;
+
+typedef struct fs_perm_set fs_perm_set_t;
+/* fs permissions */
+struct fs_perm {
+ const char *fsp_name;
+
+ uu_avl_t *fsp_sc_avl; /* sets,create */
+ uu_avl_t *fsp_uge_avl; /* user,group,everyone */
+
+ fs_perm_set_t *fsp_set; /* uplink */
+};
+
+/* */
+typedef struct fs_perm_node {
+ fs_perm_t fspn_fsperm;
+ uu_avl_t *fspn_avl;
+
+ uu_list_node_t fspn_list_node;
+} fs_perm_node_t;
+
+/* top level structure */
+struct fs_perm_set {
+ uu_list_pool_t *fsps_list_pool;
+ uu_list_t *fsps_list; /* list of fs_perms */
+
+ uu_avl_pool_t *fsps_named_set_avl_pool;
+ uu_avl_pool_t *fsps_who_perm_avl_pool;
+ uu_avl_pool_t *fsps_deleg_perm_avl_pool;
+};
+
+static inline const char *
+deleg_perm_type(zfs_deleg_note_t note)
+{
+ /* subcommands */
+ switch (note) {
+ /* SUBCOMMANDS */
+ /* OTHER */
+ case ZFS_DELEG_NOTE_GROUPQUOTA:
+ case ZFS_DELEG_NOTE_GROUPUSED:
+ case ZFS_DELEG_NOTE_USERPROP:
+ case ZFS_DELEG_NOTE_USERQUOTA:
+ case ZFS_DELEG_NOTE_USERUSED:
+ /* other */
+ return (gettext("other"));
+ default:
+ return (gettext("subcommand"));
+ }
+}
+
+static int inline
+who_type2weight(zfs_deleg_who_type_t who_type)
+{
+ int res;
+ switch (who_type) {
+ case ZFS_DELEG_NAMED_SET_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ res = 0;
+ break;
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_CREATE:
+ res = 1;
+ break;
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ res = 2;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ res = 3;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ res = 4;
+ break;
+ default:
+ res = -1;
+ }
+
+ return (res);
+}
+
+/* ARGSUSED */
+static int
+who_perm_compare(const void *larg, const void *rarg, void *unused)
+{
+ const who_perm_node_t *l = larg;
+ const who_perm_node_t *r = rarg;
+ zfs_deleg_who_type_t ltype = l->who_perm.who_type;
+ zfs_deleg_who_type_t rtype = r->who_perm.who_type;
+ int lweight = who_type2weight(ltype);
+ int rweight = who_type2weight(rtype);
+ int res = lweight - rweight;
+ if (res == 0)
+ res = strncmp(l->who_perm.who_name, r->who_perm.who_name,
+ ZFS_MAX_DELEG_NAME-1);
+
+ if (res == 0)
+ return (0);
+ if (res > 0)
+ return (1);
+ else
+ return (-1);
+}
+
+/* ARGSUSED */
+static int
+deleg_perm_compare(const void *larg, const void *rarg, void *unused)
+{
+ const deleg_perm_node_t *l = larg;
+ const deleg_perm_node_t *r = rarg;
+ int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name,
+ ZFS_MAX_DELEG_NAME-1);
+
+ if (res == 0)
+ return (0);
+
+ if (res > 0)
+ return (1);
+ else
+ return (-1);
+}
+
+static inline void
+fs_perm_set_init(fs_perm_set_t *fspset)
+{
+ bzero(fspset, sizeof (fs_perm_set_t));
+
+ if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool",
+ sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node),
+ NULL, UU_DEFAULT)) == NULL)
+ nomem();
+ if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create(
+ "named_set_avl_pool", sizeof (who_perm_node_t), offsetof(
+ who_perm_node_t, who_avl_node), who_perm_compare,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create(
+ "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof(
+ who_perm_node_t, who_avl_node), who_perm_compare,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create(
+ "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof(
+ deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT))
+ == NULL)
+ nomem();
+}
+
+static inline void fs_perm_fini(fs_perm_t *);
+static inline void who_perm_fini(who_perm_t *);
+
+static inline void
+fs_perm_set_fini(fs_perm_set_t *fspset)
+{
+ fs_perm_node_t *node = uu_list_first(fspset->fsps_list);
+
+ while (node != NULL) {
+ fs_perm_node_t *next_node =
+ uu_list_next(fspset->fsps_list, node);
+ fs_perm_t *fsperm = &node->fspn_fsperm;
+ fs_perm_fini(fsperm);
+ uu_list_remove(fspset->fsps_list, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool);
+ uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool);
+ uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool);
+}
+
+static inline void
+deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type,
+ const char *name)
+{
+ deleg_perm->dp_who_type = type;
+ deleg_perm->dp_name = name;
+}
+
+static inline void
+who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm,
+ zfs_deleg_who_type_t type, const char *name)
+{
+ uu_avl_pool_t *pool;
+ pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool;
+
+ bzero(who_perm, sizeof (who_perm_t));
+
+ if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ who_perm->who_type = type;
+ who_perm->who_name = name;
+ who_perm->who_fsperm = fsperm;
+}
+
+static inline void
+who_perm_fini(who_perm_t *who_perm)
+{
+ deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl);
+
+ while (node != NULL) {
+ deleg_perm_node_t *next_node =
+ uu_avl_next(who_perm->who_deleg_perm_avl, node);
+
+ uu_avl_remove(who_perm->who_deleg_perm_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_destroy(who_perm->who_deleg_perm_avl);
+}
+
+static inline void
+fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname)
+{
+ uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool;
+ uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool;
+
+ bzero(fsperm, sizeof (fs_perm_t));
+
+ if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT))
+ == NULL)
+ nomem();
+
+ if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT))
+ == NULL)
+ nomem();
+
+ fsperm->fsp_set = fspset;
+ fsperm->fsp_name = fsname;
+}
+
+static inline void
+fs_perm_fini(fs_perm_t *fsperm)
+{
+ who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl);
+ while (node != NULL) {
+ who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl,
+ node);
+ who_perm_t *who_perm = &node->who_perm;
+ who_perm_fini(who_perm);
+ uu_avl_remove(fsperm->fsp_sc_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ node = uu_avl_first(fsperm->fsp_uge_avl);
+ while (node != NULL) {
+ who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl,
+ node);
+ who_perm_t *who_perm = &node->who_perm;
+ who_perm_fini(who_perm);
+ uu_avl_remove(fsperm->fsp_uge_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_destroy(fsperm->fsp_sc_avl);
+ uu_avl_destroy(fsperm->fsp_uge_avl);
+}
+
+static void inline
+set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node,
+ zfs_deleg_who_type_t who_type, const char *name, char locality)
+{
+ uu_avl_index_t idx = 0;
+
+ deleg_perm_node_t *found_node = NULL;
+ deleg_perm_t *deleg_perm = &node->dpn_perm;
+
+ deleg_perm_init(deleg_perm, who_type, name);
+
+ if ((found_node = uu_avl_find(avl, node, NULL, &idx))
+ == NULL)
+ uu_avl_insert(avl, node, idx);
+ else {
+ node = found_node;
+ deleg_perm = &node->dpn_perm;
+ }
+
+
+ switch (locality) {
+ case ZFS_DELEG_LOCAL:
+ deleg_perm->dp_local = B_TRUE;
+ break;
+ case ZFS_DELEG_DESCENDENT:
+ deleg_perm->dp_descend = B_TRUE;
+ break;
+ case ZFS_DELEG_NA:
+ break;
+ default:
+ assert(B_FALSE); /* invalid locality */
+ }
+}
+
+static inline int
+parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality)
+{
+ nvpair_t *nvp = NULL;
+ fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set;
+ uu_avl_t *avl = who_perm->who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_perm->who_type;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *name = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool;
+ deleg_perm_node_t *node =
+ safe_malloc(sizeof (deleg_perm_node_t));
+
+ assert(type == DATA_TYPE_BOOLEAN);
+
+ uu_avl_node_init(node, &node->dpn_avl_node, avl_pool);
+ set_deleg_perm_node(avl, node, who_type, name, locality);
+ }
+
+ return (0);
+}
+
+static inline int
+parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl)
+{
+ nvpair_t *nvp = NULL;
+ fs_perm_set_t *fspset = fsperm->fsp_set;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ nvlist_t *nvl2 = NULL;
+ const char *name = nvpair_name(nvp);
+ uu_avl_t *avl = NULL;
+ uu_avl_pool_t *avl_pool;
+ zfs_deleg_who_type_t perm_type = name[0];
+ char perm_locality = name[1];
+ const char *perm_name = name + 3;
+ boolean_t is_set = B_TRUE;
+ who_perm_t *who_perm = NULL;
+
+ assert('$' == name[2]);
+
+ if (nvpair_value_nvlist(nvp, &nvl2) != 0)
+ return (-1);
+
+ switch (perm_type) {
+ case ZFS_DELEG_CREATE:
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ case ZFS_DELEG_NAMED_SET_SETS:
+ avl_pool = fspset->fsps_named_set_avl_pool;
+ avl = fsperm->fsp_sc_avl;
+ break;
+ case ZFS_DELEG_USER:
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_GROUP:
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_EVERYONE:
+ case ZFS_DELEG_EVERYONE_SETS:
+ avl_pool = fspset->fsps_who_perm_avl_pool;
+ avl = fsperm->fsp_uge_avl;
+ break;
+ }
+
+ if (is_set) {
+ who_perm_node_t *found_node = NULL;
+ who_perm_node_t *node = safe_malloc(
+ sizeof (who_perm_node_t));
+ who_perm = &node->who_perm;
+ uu_avl_index_t idx = 0;
+
+ uu_avl_node_init(node, &node->who_avl_node, avl_pool);
+ who_perm_init(who_perm, fsperm, perm_type, perm_name);
+
+ if ((found_node = uu_avl_find(avl, node, NULL, &idx))
+ == NULL) {
+ if (avl == fsperm->fsp_uge_avl) {
+ uid_t rid = 0;
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+ const char *nice_name = NULL;
+
+ switch (perm_type) {
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ rid = atoi(perm_name);
+ p = getpwuid(rid);
+ if (p)
+ nice_name = p->pw_name;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ rid = atoi(perm_name);
+ g = getgrgid(rid);
+ if (g)
+ nice_name = g->gr_name;
+ break;
+ }
+
+ if (nice_name != NULL)
+ (void) strlcpy(
+ node->who_perm.who_ug_name,
+ nice_name, 256);
+ }
+
+ uu_avl_insert(avl, node, idx);
+ } else {
+ node = found_node;
+ who_perm = &node->who_perm;
+ }
+ }
+
+ (void) parse_who_perm(who_perm, nvl2, perm_locality);
+ }
+
+ return (0);
+}
+
+static inline int
+parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
+{
+ nvpair_t *nvp = NULL;
+ uu_avl_index_t idx = 0;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ nvlist_t *nvl2 = NULL;
+ const char *fsname = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ fs_perm_t *fsperm = NULL;
+ fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t));
+ if (node == NULL)
+ nomem();
+
+ fsperm = &node->fspn_fsperm;
+
+ assert(DATA_TYPE_NVLIST == type);
+
+ uu_list_node_init(node, &node->fspn_list_node,
+ fspset->fsps_list_pool);
+
+ idx = uu_list_numnodes(fspset->fsps_list);
+ fs_perm_init(fsperm, fspset, fsname);
+
+ if (nvpair_value_nvlist(nvp, &nvl2) != 0)
+ return (-1);
+
+ (void) parse_fs_perm(fsperm, nvl2);
+
+ uu_list_insert(fspset->fsps_list, node, idx);
+ }
+
+ return (0);
+}
+
+static inline const char *
+deleg_perm_comment(zfs_deleg_note_t note)
+{
+ const char *str = "";
+
+ /* subcommands */
+ switch (note) {
+ /* SUBCOMMANDS */
+ case ZFS_DELEG_NOTE_ALLOW:
+ str = gettext("Must also have the permission that is being"
+ "\n\t\t\t\tallowed");
+ break;
+ case ZFS_DELEG_NOTE_CLONE:
+ str = gettext("Must also have the 'create' ability and 'mount'"
+ "\n\t\t\t\tability in the origin file system");
+ break;
+ case ZFS_DELEG_NOTE_CREATE:
+ str = gettext("Must also have the 'mount' ability");
+ break;
+ case ZFS_DELEG_NOTE_DESTROY:
+ str = gettext("Must also have the 'mount' ability");
+ break;
+ case ZFS_DELEG_NOTE_DIFF:
+ str = gettext("Allows lookup of paths within a dataset;"
+ "\n\t\t\t\tgiven an object number. Ordinary users need this"
+ "\n\t\t\t\tin order to use zfs diff");
+ break;
+ case ZFS_DELEG_NOTE_HOLD:
+ str = gettext("Allows adding a user hold to a snapshot");
+ break;
+ case ZFS_DELEG_NOTE_MOUNT:
+ str = gettext("Allows mount/umount of ZFS datasets");
+ break;
+ case ZFS_DELEG_NOTE_PROMOTE:
+ str = gettext("Must also have the 'mount'\n\t\t\t\tand"
+ " 'promote' ability in the origin file system");
+ break;
+ case ZFS_DELEG_NOTE_RECEIVE:
+ str = gettext("Must also have the 'mount' and 'create'"
+ " ability");
+ break;
+ case ZFS_DELEG_NOTE_RELEASE:
+ str = gettext("Allows releasing a user hold which\n\t\t\t\t"
+ "might destroy the snapshot");
+ break;
+ case ZFS_DELEG_NOTE_RENAME:
+ str = gettext("Must also have the 'mount' and 'create'"
+ "\n\t\t\t\tability in the new parent");
+ break;
+ case ZFS_DELEG_NOTE_ROLLBACK:
+ str = gettext("");
+ break;
+ case ZFS_DELEG_NOTE_SEND:
+ str = gettext("");
+ break;
+ case ZFS_DELEG_NOTE_SHARE:
+ str = gettext("Allows sharing file systems over NFS or SMB"
+ "\n\t\t\t\tprotocols");
+ break;
+ case ZFS_DELEG_NOTE_SNAPSHOT:
+ str = gettext("");
+ break;
+/*
+ * case ZFS_DELEG_NOTE_VSCAN:
+ * str = gettext("");
+ * break;
+ */
+ /* OTHER */
+ case ZFS_DELEG_NOTE_GROUPQUOTA:
+ str = gettext("Allows accessing any groupquota@... property");
+ break;
+ case ZFS_DELEG_NOTE_GROUPUSED:
+ str = gettext("Allows reading any groupused@... property");
+ break;
+ case ZFS_DELEG_NOTE_USERPROP:
+ str = gettext("Allows changing any user property");
+ break;
+ case ZFS_DELEG_NOTE_USERQUOTA:
+ str = gettext("Allows accessing any userquota@... property");
+ break;
+ case ZFS_DELEG_NOTE_USERUSED:
+ str = gettext("Allows reading any userused@... property");
+ break;
+ /* other */
+ default:
+ str = "";
+ }
+
+ return (str);
+}
+
+struct allow_opts {
+ boolean_t local;
+ boolean_t descend;
+ boolean_t user;
+ boolean_t group;
+ boolean_t everyone;
+ boolean_t create;
+ boolean_t set;
+ boolean_t recursive; /* unallow only */
+ boolean_t prt_usage;
+
+ boolean_t prt_perms;
+ char *who;
+ char *perms;
+ const char *dataset;
+};
+
+static inline int
+prop_cmp(const void *a, const void *b)
+{
+ const char *str1 = *(const char **)a;
+ const char *str2 = *(const char **)b;
+ return (strcmp(str1, str2));
+}
+
+static void
+allow_usage(boolean_t un, boolean_t requested, const char *msg)
+{
+ const char *opt_desc[] = {
+ "-h", gettext("show this help message and exit"),
+ "-l", gettext("set permission locally"),
+ "-d", gettext("set permission for descents"),
+ "-u", gettext("set permission for user"),
+ "-g", gettext("set permission for group"),
+ "-e", gettext("set permission for everyone"),
+ "-c", gettext("set create time permission"),
+ "-s", gettext("define permission set"),
+ /* unallow only */
+ "-r", gettext("remove permissions recursively"),
+ };
+ size_t unallow_size = sizeof (opt_desc) / sizeof (char *);
+ size_t allow_size = unallow_size - 2;
+ const char *props[ZFS_NUM_PROPS];
+ int i;
+ size_t count = 0;
+ FILE *fp = requested ? stdout : stderr;
+ zprop_desc_t *pdtbl = zfs_prop_get_table();
+ const char *fmt = gettext("%-16s %-14s\t%s\n");
+
+ (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW :
+ HELP_ALLOW));
+ (void) fprintf(fp, gettext("Options:\n"));
+ for (i = 0; i < (un ? unallow_size : allow_size); i++) {
+ const char *opt = opt_desc[i++];
+ const char *optdsc = opt_desc[i];
+ (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc);
+ }
+
+ (void) fprintf(fp, gettext("\nThe following permissions are "
+ "supported:\n\n"));
+ (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"),
+ gettext("NOTES"));
+ for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) {
+ const char *perm_name = zfs_deleg_perm_tbl[i].z_perm;
+ zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note;
+ const char *perm_type = deleg_perm_type(perm_note);
+ const char *perm_comment = deleg_perm_comment(perm_note);
+ (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment);
+ }
+
+ for (i = 0; i < ZFS_NUM_PROPS; i++) {
+ zprop_desc_t *pd = &pdtbl[i];
+ if (pd->pd_visible != B_TRUE)
+ continue;
+
+ if (pd->pd_attr == PROP_READONLY)
+ continue;
+
+ props[count++] = pd->pd_name;
+ }
+ props[count] = NULL;
+
+ qsort(props, count, sizeof (char *), prop_cmp);
+
+ for (i = 0; i < count; i++)
+ (void) fprintf(fp, fmt, props[i], gettext("property"), "");
+
+ if (msg != NULL)
+ (void) fprintf(fp, gettext("\nzfs: error: %s"), msg);
+
+ exit(requested ? 0 : 2);
+}
+
+static inline const char *
+munge_args(int argc, char **argv, boolean_t un, size_t expected_argc,
+ char **permsp)
+{
+ if (un && argc == expected_argc - 1)
+ *permsp = NULL;
+ else if (argc == expected_argc)
+ *permsp = argv[argc - 2];
+ else
+ allow_usage(un, B_FALSE,
+ gettext("wrong number of parameters\n"));
+
+ return (argv[argc - 1]);
+}
+
+static void
+parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts)
+{
+ int uge_sum = opts->user + opts->group + opts->everyone;
+ int csuge_sum = opts->create + opts->set + uge_sum;
+ int ldcsuge_sum = csuge_sum + opts->local + opts->descend;
+ int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum;
+
+ if (uge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("-u, -g, and -e are mutually exclusive\n"));
+
+ if (opts->prt_usage)
+ if (argc == 0 && all_sum == 0)
+ allow_usage(un, B_TRUE, NULL);
+ else
+ usage(B_FALSE);
+
+ if (opts->set) {
+ if (csuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -s\n"));
+
+ opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
+ if (argv[0][0] != '@')
+ allow_usage(un, B_FALSE,
+ gettext("invalid set name: missing '@' prefix\n"));
+ opts->who = argv[0];
+ } else if (opts->create) {
+ if (ldcsuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -c\n"));
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (opts->everyone) {
+ if (csuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -e\n"));
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone")
+ == 0) {
+ opts->everyone = B_TRUE;
+ argc--;
+ argv++;
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (argc == 1) {
+ opts->prt_perms = B_TRUE;
+ opts->dataset = argv[argc-1];
+ } else {
+ opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
+ opts->who = argv[0];
+ }
+
+ if (!opts->local && !opts->descend) {
+ opts->local = B_TRUE;
+ opts->descend = B_TRUE;
+ }
+}
+
+static void
+store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend,
+ const char *who, char *perms, nvlist_t *top_nvl)
+{
+ int i;
+ char ld[2] = { '\0', '\0' };
+ char who_buf[ZFS_MAXNAMELEN+32];
+ char base_type;
+ char set_type;
+ nvlist_t *base_nvl = NULL;
+ nvlist_t *set_nvl = NULL;
+ nvlist_t *nvl;
+
+ if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+ if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ switch (type) {
+ case ZFS_DELEG_NAMED_SET_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ set_type = ZFS_DELEG_NAMED_SET_SETS;
+ base_type = ZFS_DELEG_NAMED_SET;
+ ld[0] = ZFS_DELEG_NA;
+ break;
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_CREATE:
+ set_type = ZFS_DELEG_CREATE_SETS;
+ base_type = ZFS_DELEG_CREATE;
+ ld[0] = ZFS_DELEG_NA;
+ break;
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ set_type = ZFS_DELEG_USER_SETS;
+ base_type = ZFS_DELEG_USER;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ set_type = ZFS_DELEG_GROUP_SETS;
+ base_type = ZFS_DELEG_GROUP;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ set_type = ZFS_DELEG_EVERYONE_SETS;
+ base_type = ZFS_DELEG_EVERYONE;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ }
+
+ if (perms != NULL) {
+ char *curr = perms;
+ char *end = curr + strlen(perms);
+
+ while (curr < end) {
+ char *delim = strchr(curr, ',');
+ if (delim == NULL)
+ delim = end;
+ else
+ *delim = '\0';
+
+ if (curr[0] == '@')
+ nvl = set_nvl;
+ else
+ nvl = base_nvl;
+
+ (void) nvlist_add_boolean(nvl, curr);
+ if (delim != end)
+ *delim = ',';
+ curr = delim + 1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ char locality = ld[i];
+ if (locality == 0)
+ continue;
+
+ if (!nvlist_empty(base_nvl)) {
+ if (who != NULL)
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$%s",
+ base_type, locality, who);
+ else
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$",
+ base_type, locality);
+
+ (void) nvlist_add_nvlist(top_nvl, who_buf,
+ base_nvl);
+ }
+
+
+ if (!nvlist_empty(set_nvl)) {
+ if (who != NULL)
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$%s",
+ set_type, locality, who);
+ else
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$",
+ set_type, locality);
+
+ (void) nvlist_add_nvlist(top_nvl, who_buf,
+ set_nvl);
+ }
+ }
+ } else {
+ for (i = 0; i < 2; i++) {
+ char locality = ld[i];
+ if (locality == 0)
+ continue;
+
+ if (who != NULL)
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$%s", base_type, locality, who);
+ else
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$", base_type, locality);
+ (void) nvlist_add_boolean(top_nvl, who_buf);
+
+ if (who != NULL)
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$%s", set_type, locality, who);
+ else
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$", set_type, locality);
+ (void) nvlist_add_boolean(top_nvl, who_buf);
+ }
+ }
+}
+
+static int
+construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
+{
+ if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ if (opts->set) {
+ store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local,
+ opts->descend, opts->who, opts->perms, *nvlp);
+ } else if (opts->create) {
+ store_allow_perm(ZFS_DELEG_CREATE, opts->local,
+ opts->descend, NULL, opts->perms, *nvlp);
+ } else if (opts->everyone) {
+ store_allow_perm(ZFS_DELEG_EVERYONE, opts->local,
+ opts->descend, NULL, opts->perms, *nvlp);
+ } else {
+ char *curr = opts->who;
+ char *end = curr + strlen(curr);
+
+ while (curr < end) {
+ const char *who;
+ zfs_deleg_who_type_t who_type;
+ char *endch;
+ char *delim = strchr(curr, ',');
+ char errbuf[256];
+ char id[64];
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+
+ uid_t rid;
+ if (delim == NULL)
+ delim = end;
+ else
+ *delim = '\0';
+
+ rid = (uid_t)strtol(curr, &endch, 0);
+ if (opts->user) {
+ who_type = ZFS_DELEG_USER;
+ if (*endch != '\0')
+ p = getpwnam(curr);
+ else
+ p = getpwuid(rid);
+
+ if (p != NULL)
+ rid = p->pw_uid;
+ else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid user %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ } else if (opts->group) {
+ who_type = ZFS_DELEG_GROUP;
+ if (*endch != '\0')
+ g = getgrnam(curr);
+ else
+ g = getgrgid(rid);
+
+ if (g != NULL)
+ rid = g->gr_gid;
+ else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid group %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ } else {
+ if (*endch != '\0') {
+ p = getpwnam(curr);
+ } else {
+ p = getpwuid(rid);
+ }
+
+ if (p == NULL)
+ if (*endch != '\0') {
+ g = getgrnam(curr);
+ } else {
+ g = getgrgid(rid);
+ }
+
+ if (p != NULL) {
+ who_type = ZFS_DELEG_USER;
+ rid = p->pw_uid;
+ } else if (g != NULL) {
+ who_type = ZFS_DELEG_GROUP;
+ rid = g->gr_gid;
+ } else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid user/group %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ }
+
+ (void) sprintf(id, "%u", rid);
+ who = id;
+
+ store_allow_perm(who_type, opts->local,
+ opts->descend, who, opts->perms, *nvlp);
+ curr = delim + 1;
+ }
+ }
+
+ return (0);
+}
+
+static void
+print_set_creat_perms(uu_avl_t *who_avl)
+{
+ const char *sc_title[] = {
+ gettext("Permission sets:\n"),
+ gettext("Create time permissions:\n"),
+ NULL
+ };
+ const char **title_ptr = sc_title;
+ who_perm_node_t *who_node = NULL;
+ int prev_weight = -1;
+
+ for (who_node = uu_avl_first(who_avl); who_node != NULL;
+ who_node = uu_avl_next(who_avl, who_node)) {
+ uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
+ const char *who_name = who_node->who_perm.who_name;
+ int weight = who_type2weight(who_type);
+ boolean_t first = B_TRUE;
+ deleg_perm_node_t *deleg_node;
+
+ if (prev_weight != weight) {
+ (void) printf(*title_ptr++);
+ prev_weight = weight;
+ }
+
+ if (who_name == NULL || strnlen(who_name, 1) == 0)
+ (void) printf("\t");
+ else
+ (void) printf("\t%s ", who_name);
+
+ for (deleg_node = uu_avl_first(avl); deleg_node != NULL;
+ deleg_node = uu_avl_next(avl, deleg_node)) {
+ if (first) {
+ (void) printf("%s",
+ deleg_node->dpn_perm.dp_name);
+ first = B_FALSE;
+ } else
+ (void) printf(",%s",
+ deleg_node->dpn_perm.dp_name);
+ }
+
+ (void) printf("\n");
+ }
+}
+
+static void inline
+print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend,
+ const char *title)
+{
+ who_perm_node_t *who_node = NULL;
+ boolean_t prt_title = B_TRUE;
+ uu_avl_walk_t *walk;
+
+ if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
+
+ while ((who_node = uu_avl_walk_next(walk)) != NULL) {
+ const char *who_name = who_node->who_perm.who_name;
+ const char *nice_who_name = who_node->who_perm.who_ug_name;
+ uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
+ char delim = ' ';
+ deleg_perm_node_t *deleg_node;
+ boolean_t prt_who = B_TRUE;
+
+ for (deleg_node = uu_avl_first(avl);
+ deleg_node != NULL;
+ deleg_node = uu_avl_next(avl, deleg_node)) {
+ if (local != deleg_node->dpn_perm.dp_local ||
+ descend != deleg_node->dpn_perm.dp_descend)
+ continue;
+
+ if (prt_who) {
+ const char *who = NULL;
+ if (prt_title) {
+ prt_title = B_FALSE;
+ (void) printf(title);
+ }
+
+ switch (who_type) {
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ who = gettext("user");
+ if (nice_who_name)
+ who_name = nice_who_name;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ who = gettext("group");
+ if (nice_who_name)
+ who_name = nice_who_name;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ who = gettext("everyone");
+ who_name = NULL;
+ }
+
+ prt_who = B_FALSE;
+ if (who_name == NULL)
+ (void) printf("\t%s", who);
+ else
+ (void) printf("\t%s %s", who, who_name);
+ }
+
+ (void) printf("%c%s", delim,
+ deleg_node->dpn_perm.dp_name);
+ delim = ',';
+ }
+
+ if (!prt_who)
+ (void) printf("\n");
+ }
+
+ uu_avl_walk_end(walk);
+}
+
+static void
+print_fs_perms(fs_perm_set_t *fspset)
+{
+ fs_perm_node_t *node = NULL;
+ char buf[ZFS_MAXNAMELEN+32];
+ const char *dsname = buf;
+
+ for (node = uu_list_first(fspset->fsps_list); node != NULL;
+ node = uu_list_next(fspset->fsps_list, node)) {
+ uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl;
+ uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl;
+ int left = 0;
+
+ (void) snprintf(buf, ZFS_MAXNAMELEN+32,
+ gettext("---- Permissions on %s "),
+ node->fspn_fsperm.fsp_name);
+ (void) printf(dsname);
+ left = 70 - strlen(buf);
+ while (left-- > 0)
+ (void) printf("-");
+ (void) printf("\n");
+
+ print_set_creat_perms(sc_avl);
+ print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE,
+ gettext("Local permissions:\n"));
+ print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE,
+ gettext("Descendent permissions:\n"));
+ print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE,
+ gettext("Local+Descendent permissions:\n"));
+ }
+}
+
+static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL };
+
+struct deleg_perms {
+ boolean_t un;
+ nvlist_t *nvl;
+};
+
+static int
+set_deleg_perms(zfs_handle_t *zhp, void *data)
+{
+ struct deleg_perms *perms = (struct deleg_perms *)data;
+ zfs_type_t zfs_type = zfs_get_type(zhp);
+
+ if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME)
+ return (0);
+
+ return (zfs_set_fsacl(zhp, perms->un, perms->nvl));
+}
+
+static int
+zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
+{
+ zfs_handle_t *zhp;
+ nvlist_t *perm_nvl = NULL;
+ nvlist_t *update_perm_nvl = NULL;
+ int error = 1;
+ int c;
+ struct allow_opts opts = { 0 };
+
+ const char *optstr = un ? "ldugecsrh" : "ldugecsh";
+
+ /* check opts */
+ while ((c = getopt(argc, argv, optstr)) != -1) {
+ switch (c) {
+ case 'l':
+ opts.local = B_TRUE;
+ break;
+ case 'd':
+ opts.descend = B_TRUE;
+ break;
+ case 'u':
+ opts.user = B_TRUE;
+ break;
+ case 'g':
+ opts.group = B_TRUE;
+ break;
+ case 'e':
+ opts.everyone = B_TRUE;
+ break;
+ case 's':
+ opts.set = B_TRUE;
+ break;
+ case 'c':
+ opts.create = B_TRUE;
+ break;
+ case 'r':
+ opts.recursive = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case 'h':
+ opts.prt_usage = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check arguments */
+ parse_allow_args(argc, argv, un, &opts);
+
+ /* try to open the dataset */
+ if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM))
+ == NULL) {
+ (void) fprintf(stderr, "Failed to open Dataset *%s*\n",
+ opts.dataset);
+ return (-1);
+ }
+
+ if (zfs_get_fsacl(zhp, &perm_nvl) != 0)
+ goto cleanup2;
+
+ fs_perm_set_init(&fs_perm_set);
+ if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) {
+ (void) fprintf(stderr, "Failed to parse fsacl permissionsn");
+ goto cleanup1;
+ }
+
+ if (opts.prt_perms)
+ print_fs_perms(&fs_perm_set);
+ else {
+ (void) construct_fsacl_list(un, &opts, &update_perm_nvl);
+ if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0)
+ goto cleanup0;
+
+ if (un && opts.recursive) {
+ struct deleg_perms data = { un, update_perm_nvl };
+ if (zfs_iter_filesystems(zhp, set_deleg_perms,
+ &data) != 0)
+ goto cleanup0;
+ }
+ }
+
+ error = 0;
+
+cleanup0:
+ nvlist_free(perm_nvl);
+ if (update_perm_nvl != NULL)
+ nvlist_free(update_perm_nvl);
+cleanup1:
+ fs_perm_set_fini(&fs_perm_set);
+cleanup2:
+ zfs_close(zhp);
+
+ return (error);
+}
+
+/*
+ * zfs allow [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_allow(int argc, char **argv)
+{
+ return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
+}
+
+/*
+ * zfs unallow [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_unallow(int argc, char **argv)
+{
+ return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE));
+}
+
+static int
+zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
+{
+ int errors = 0;
+ int i;
+ const char *tag;
+ boolean_t recursive = B_FALSE;
+ boolean_t temphold = B_FALSE;
+ const char *opts = holding ? "rt" : "r";
+ int c;
+
+ /* check options */
+ while ((c = getopt(argc, argv, opts)) != -1) {
+ switch (c) {
+ case 'r':
+ recursive = B_TRUE;
+ break;
+ case 't':
+ temphold = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check number of arguments */
+ if (argc < 2)
+ usage(B_FALSE);
+
+ tag = argv[0];
+ --argc;
+ ++argv;
+
+ if (holding && tag[0] == '.') {
+ /* tags starting with '.' are reserved for libzfs */
+ (void) fprintf(stderr, gettext("tag may not start with '.'\n"));
+ usage(B_FALSE);
+ }
+
+ for (i = 0; i < argc; ++i) {
+ zfs_handle_t *zhp;
+ char parent[ZFS_MAXNAMELEN];
+ const char *delim;
+ char *path = argv[i];
+
+ delim = strchr(path, '@');
+ if (delim == NULL) {
+ (void) fprintf(stderr,
+ gettext("'%s' is not a snapshot\n"), path);
+ ++errors;
+ continue;
+ }
+ (void) strncpy(parent, path, delim - path);
+ parent[delim - path] = '\0';
+
+ zhp = zfs_open(g_zfs, parent,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL) {
+ ++errors;
+ continue;
+ }
+ if (holding) {
+ if (zfs_hold(zhp, delim+1, tag, recursive,
+ temphold, B_FALSE, -1, 0, 0) != 0)
+ ++errors;
+ } else {
+ if (zfs_release(zhp, delim+1, tag, recursive) != 0)
+ ++errors;
+ }
+ zfs_close(zhp);
+ }
+
+ return (errors != 0);
+}
+
+/*
+ * zfs hold [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_hold(int argc, char **argv)
+{
+ return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
+}
+
+/*
+ * zfs release [-r] <tag> <snap> ...
+ *
+ * -r Recursively release
+ *
+ * Release a user-hold with the given tag from the list of snapshots.
+ */
+static int
+zfs_do_release(int argc, char **argv)
+{
+ return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
+}
+
+typedef struct holds_cbdata {
+ boolean_t cb_recursive;
+ const char *cb_snapname;
+ nvlist_t **cb_nvlp;
+ size_t cb_max_namelen;
+ size_t cb_max_taglen;
+} holds_cbdata_t;
+
+#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y"
+#define DATETIME_BUF_LEN (32)
+/*
+ *
+ */
+static void
+print_holds(boolean_t scripted, size_t nwidth, size_t tagwidth, nvlist_t *nvl)
+{
+ int i;
+ nvpair_t *nvp = NULL;
+ char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" };
+ const char *col;
+
+ if (!scripted) {
+ for (i = 0; i < 3; i++) {
+ col = gettext(hdr_cols[i]);
+ if (i < 2)
+ (void) printf("%-*s ", i ? tagwidth : nwidth,
+ col);
+ else
+ (void) printf("%s\n", col);
+ }
+ }
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ char *zname = nvpair_name(nvp);
+ nvlist_t *nvl2;
+ nvpair_t *nvp2 = NULL;
+ (void) nvpair_value_nvlist(nvp, &nvl2);
+ while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) {
+ char tsbuf[DATETIME_BUF_LEN];
+ char *tagname = nvpair_name(nvp2);
+ uint64_t val = 0;
+ time_t time;
+ struct tm t;
+ char sep = scripted ? '\t' : ' ';
+ size_t sepnum = scripted ? 1 : 2;
+
+ (void) nvpair_value_uint64(nvp2, &val);
+ time = (time_t)val;
+ (void) localtime_r(&time, &t);
+ (void) strftime(tsbuf, DATETIME_BUF_LEN,
+ gettext(STRFTIME_FMT_STR), &t);
+
+ (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname,
+ sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf);
+ }
+ }
+}
+
+/*
+ * Generic callback function to list a dataset or snapshot.
+ */
+static int
+holds_callback(zfs_handle_t *zhp, void *data)
+{
+ holds_cbdata_t *cbp = data;
+ nvlist_t *top_nvl = *cbp->cb_nvlp;
+ nvlist_t *nvl = NULL;
+ nvpair_t *nvp = NULL;
+ const char *zname = zfs_get_name(zhp);
+ size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN);
+
+ if (cbp->cb_recursive) {
+ const char *snapname;
+ char *delim = strchr(zname, '@');
+ if (delim == NULL)
+ return (0);
+
+ snapname = delim + 1;
+ if (strcmp(cbp->cb_snapname, snapname))
+ return (0);
+ }
+
+ if (zfs_get_holds(zhp, &nvl) != 0)
+ return (-1);
+
+ if (znamelen > cbp->cb_max_namelen)
+ cbp->cb_max_namelen = znamelen;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *tag = nvpair_name(nvp);
+ size_t taglen = strnlen(tag, MAXNAMELEN);
+ if (taglen > cbp->cb_max_taglen)
+ cbp->cb_max_taglen = taglen;
+ }
+
+ return (nvlist_add_nvlist(top_nvl, zname, nvl));
+}
+
+/*
+ * zfs holds [-r] <snap> ...
+ *
+ * -r Recursively hold
+ */
+static int
+zfs_do_holds(int argc, char **argv)
+{
+ int errors = 0;
+ int c;
+ int i;
+ boolean_t scripted = B_FALSE;
+ boolean_t recursive = B_FALSE;
+ const char *opts = "rH";
+ nvlist_t *nvl;
+
+ int types = ZFS_TYPE_SNAPSHOT;
+ holds_cbdata_t cb = { 0 };
+
+ int limit = 0;
+ int ret;
+ int flags = 0;
+
+ /* check options */
+ while ((c = getopt(argc, argv, opts)) != -1) {
+ switch (c) {
+ case 'r':
+ recursive = B_TRUE;
+ break;
+ case 'H':
+ scripted = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ if (recursive) {
+ types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
+ flags |= ZFS_ITER_RECURSE;
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check number of arguments */
+ if (argc < 1)
+ usage(B_FALSE);
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ for (i = 0; i < argc; ++i) {
+ char *snapshot = argv[i];
+ const char *delim;
+ const char *snapname;
+
+ delim = strchr(snapshot, '@');
+ if (delim == NULL) {
+ (void) fprintf(stderr,
+ gettext("'%s' is not a snapshot\n"), snapshot);
+ ++errors;
+ continue;
+ }
+ snapname = delim + 1;
+ if (recursive)
+ snapshot[delim - snapshot] = '\0';
+
+ cb.cb_recursive = recursive;
+ cb.cb_snapname = snapname;
+ cb.cb_nvlp = &nvl;
+
+ /*
+ * 1. collect holds data, set format options
+ */
+ ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
+ holds_callback, &cb);
+ if (ret != 0)
+ ++errors;
+ }
+
+ /*
+ * 2. print holds data
+ */
+ print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl);
+
+ if (nvlist_empty(nvl))
+ (void) printf(gettext("no datasets available\n"));
+
+ nvlist_free(nvl);
+
+ return (0 != errors);
+}
#define CHECK_SPINNER 30
#define SPINNER_TIME 3 /* seconds */
@@ -2676,19 +5151,18 @@ typedef struct get_all_cbdata {
static int
get_one_dataset(zfs_handle_t *zhp, void *data)
{
- static char spin[] = { '-', '\\', '|', '/' };
+ static char *spin[] = { "-", "\\", "|", "/" };
static int spinval = 0;
static int spincheck = 0;
static time_t last_spin_time = (time_t)0;
- get_all_cbdata_t *cbp = data;
+ get_all_cb_t *cbp = data;
zfs_type_t type = zfs_get_type(zhp);
if (cbp->cb_verbose) {
if (--spincheck < 0) {
time_t now = time(NULL);
if (last_spin_time + SPINNER_TIME < now) {
- (void) printf("\b%c", spin[spinval++ % 4]);
- (void) fflush(stdout);
+ update_progress(spin[spinval++ % 4]);
last_spin_time = now;
}
spincheck = CHECK_SPINNER;
@@ -2698,8 +5172,7 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
/*
* Interate over any nested datasets.
*/
- if (type == ZFS_TYPE_FILESYSTEM &&
- zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
+ if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
zfs_close(zhp);
return (1);
}
@@ -2707,83 +5180,32 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
/*
* Skip any datasets whose type does not match.
*/
- if ((type & cbp->cb_types) == 0) {
+ if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
zfs_close(zhp);
return (0);
}
-
- if (cbp->cb_alloc == cbp->cb_used) {
- zfs_handle_t **handles;
-
- if (cbp->cb_alloc == 0)
- cbp->cb_alloc = 64;
- else
- cbp->cb_alloc *= 2;
-
- handles = safe_malloc(cbp->cb_alloc * sizeof (void *));
-
- if (cbp->cb_handles) {
- bcopy(cbp->cb_handles, handles,
- cbp->cb_used * sizeof (void *));
- free(cbp->cb_handles);
- }
-
- cbp->cb_handles = handles;
- }
-
- cbp->cb_handles[cbp->cb_used++] = zhp;
+ libzfs_add_handle(cbp, zhp);
+ assert(cbp->cb_used <= cbp->cb_alloc);
return (0);
}
static void
-get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count,
- boolean_t verbose)
+get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
{
- get_all_cbdata_t cb = { 0 };
- cb.cb_types = types;
+ get_all_cb_t cb = { 0 };
cb.cb_verbose = verbose;
+ cb.cb_getone = get_one_dataset;
- if (verbose) {
- (void) printf("%s: *", gettext("Reading ZFS config"));
- (void) fflush(stdout);
- }
-
+ if (verbose)
+ set_progress_header(gettext("Reading ZFS config"));
(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
*dslist = cb.cb_handles;
*count = cb.cb_used;
- if (verbose) {
- (void) printf("\b%s\n", gettext("done."));
- }
-}
-
-static int
-dataset_cmp(const void *a, const void *b)
-{
- zfs_handle_t **za = (zfs_handle_t **)a;
- zfs_handle_t **zb = (zfs_handle_t **)b;
- char mounta[MAXPATHLEN];
- char mountb[MAXPATHLEN];
- boolean_t gota, gotb;
-
- if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
- if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
-
- if (gota && gotb)
- return (strcmp(mounta, mountb));
-
- if (gota)
- return (-1);
- if (gotb)
- return (1);
-
- return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+ if (verbose)
+ finish_progress(gettext("done."));
}
/*
@@ -2807,216 +5229,179 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
const char *cmdname = op == OP_SHARE ? "share" : "mount";
struct mnttab mnt;
uint64_t zoned, canmount;
- zfs_type_t type = zfs_get_type(zhp);
boolean_t shared_nfs, shared_smb;
- assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME));
+ assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM);
- if (type == ZFS_TYPE_FILESYSTEM) {
- /*
- * Check to make sure we can mount/share this dataset. If we
- * are in the global zone and the filesystem is exported to a
- * local zone, or if we are in a local zone and the
- * filesystem is not exported, then it is an error.
- */
- zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+ /*
+ * Check to make sure we can mount/share this dataset. If we
+ * are in the global zone and the filesystem is exported to a
+ * local zone, or if we are in a local zone and the
+ * filesystem is not exported, then it is an error.
+ */
+ zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
- if (zoned && getzoneid() == GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
+ if (zoned && getzoneid() == GLOBAL_ZONEID) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "dataset is exported to a local zone\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "dataset is exported to a local zone\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
- } else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
+ } else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "permission denied\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "permission denied\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
+ }
- /*
- * Ignore any filesystems which don't apply to us. This
- * includes those with a legacy mountpoint, or those with
- * legacy share options.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
- sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
-
- if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
- strcmp(smbshareopts, "off") == 0) {
- if (!explicit)
- return (0);
+ /*
+ * Ignore any filesystems which don't apply to us. This
+ * includes those with a legacy mountpoint, or those with
+ * legacy share options.
+ */
+ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
+ sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
+ sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
+ sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
+
+ if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
+ strcmp(smbshareopts, "off") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "legacy share\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use share(1M) to "
- "share this filesystem, or set "
- "sharenfs property on\n"));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot share '%s': "
+ "legacy share\n"), zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use share(1M) to "
+ "share this filesystem, or set "
+ "sharenfs property on\n"));
+ return (1);
+ }
- /*
- * We cannot share or mount legacy filesystems. If the
- * shareopts is non-legacy but the mountpoint is legacy, we
- * treat it as a legacy share.
- */
- if (strcmp(mountpoint, "legacy") == 0) {
- if (!explicit)
- return (0);
+ /*
+ * We cannot share or mount legacy filesystems. If the
+ * shareopts is non-legacy but the mountpoint is legacy, we
+ * treat it as a legacy share.
+ */
+ if (strcmp(mountpoint, "legacy") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use %s(1M) to "
- "%s this filesystem\n"), cmdname, cmdname);
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use %s(1M) to "
+ "%s this filesystem\n"), cmdname, cmdname);
+ return (1);
+ }
- if (strcmp(mountpoint, "none") == 0) {
- if (!explicit)
- return (0);
+ if (strcmp(mountpoint, "none") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': no "
- "mountpoint set\n"), cmdname, zfs_get_name(zhp));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': no "
+ "mountpoint set\n"), cmdname, zfs_get_name(zhp));
+ return (1);
+ }
- /*
- * canmount explicit outcome
- * on no pass through
- * on yes pass through
- * off no return 0
- * off yes display error, return 1
- * noauto no return 0
- * noauto yes pass through
- */
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
- if (canmount == ZFS_CANMOUNT_OFF) {
+ /*
+ * canmount explicit outcome
+ * on no pass through
+ * on yes pass through
+ * off no return 0
+ * off yes display error, return 1
+ * noauto no return 0
+ * noauto yes pass through
+ */
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
+ if (canmount == ZFS_CANMOUNT_OFF) {
+ if (!explicit)
+ return (0);
+
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "'canmount' property is set to 'off'\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
+ } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
+ return (0);
+ }
+
+ /*
+ * At this point, we have verified that the mountpoint and/or
+ * shareopts are appropriate for auto management. If the
+ * filesystem is already mounted or shared, return (failing
+ * for explicit requests); otherwise mount or share the
+ * filesystem.
+ */
+ switch (op) {
+ case OP_SHARE:
+
+ shared_nfs = zfs_is_shared_nfs(zhp, NULL);
+ shared_smb = zfs_is_shared_smb(zhp, NULL);
+
+ if (shared_nfs && shared_smb ||
+ (shared_nfs && strcmp(shareopts, "on") == 0 &&
+ strcmp(smbshareopts, "off") == 0) ||
+ (shared_smb && strcmp(smbshareopts, "on") == 0 &&
+ strcmp(shareopts, "off") == 0)) {
if (!explicit)
return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "'canmount' property is set to 'off'\n"), cmdname,
+ (void) fprintf(stderr, gettext("cannot share "
+ "'%s': filesystem already shared\n"),
zfs_get_name(zhp));
return (1);
- } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
- return (0);
}
- /*
- * At this point, we have verified that the mountpoint and/or
- * shareopts are appropriate for auto management. If the
- * filesystem is already mounted or shared, return (failing
- * for explicit requests); otherwise mount or share the
- * filesystem.
- */
- switch (op) {
- case OP_SHARE:
-
- shared_nfs = zfs_is_shared_nfs(zhp, NULL);
- shared_smb = zfs_is_shared_smb(zhp, NULL);
-
- if (shared_nfs && shared_smb ||
- (shared_nfs && strcmp(shareopts, "on") == 0 &&
- strcmp(smbshareopts, "off") == 0) ||
- (shared_smb && strcmp(smbshareopts, "on") == 0 &&
- strcmp(shareopts, "off") == 0)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': filesystem already shared\n"),
- zfs_get_name(zhp));
- return (1);
- }
+ if (!zfs_is_mounted(zhp, NULL) &&
+ zfs_mount(zhp, NULL, 0) != 0)
+ return (1);
- if (!zfs_is_mounted(zhp, NULL) &&
- zfs_mount(zhp, NULL, 0) != 0)
+ if (protocol == NULL) {
+ if (zfs_shareall(zhp) != 0)
return (1);
-
- if (protocol == NULL) {
- if (zfs_shareall(zhp) != 0)
- return (1);
- } else if (strcmp(protocol, "nfs") == 0) {
- if (zfs_share_nfs(zhp))
- return (1);
- } else if (strcmp(protocol, "smb") == 0) {
- if (zfs_share_smb(zhp))
- return (1);
- } else {
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': invalid share type '%s' "
- "specified\n"),
- zfs_get_name(zhp), protocol);
+ } else if (strcmp(protocol, "nfs") == 0) {
+ if (zfs_share_nfs(zhp))
return (1);
- }
-
- break;
-
- case OP_MOUNT:
- if (options == NULL)
- mnt.mnt_mntopts = "";
- else
- mnt.mnt_mntopts = (char *)options;
-
- if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
- zfs_is_mounted(zhp, NULL)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot mount "
- "'%s': filesystem already mounted\n"),
- zfs_get_name(zhp));
- return (1);
- }
-
- if (zfs_mount(zhp, options, flags) != 0)
+ } else if (strcmp(protocol, "smb") == 0) {
+ if (zfs_share_smb(zhp))
return (1);
- break;
+ } else {
+ (void) fprintf(stderr, gettext("cannot share "
+ "'%s': invalid share type '%s' "
+ "specified\n"),
+ zfs_get_name(zhp), protocol);
+ return (1);
}
- } else {
- assert(op == OP_SHARE);
- /*
- * Ignore any volumes that aren't shared.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+ break;
- if (strcmp(shareopts, "off") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "'shareiscsi' property not set\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set 'shareiscsi' "
- "property or use iscsitadm(1M) to share this "
- "volume\n"));
- return (1);
- }
+ case OP_MOUNT:
+ if (options == NULL)
+ mnt.mnt_mntopts = "";
+ else
+ mnt.mnt_mntopts = (char *)options;
- if (zfs_is_shared_iscsi(zhp)) {
+ if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
+ zfs_is_mounted(zhp, NULL)) {
if (!explicit)
return (0);
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': volume already shared\n"),
+ (void) fprintf(stderr, gettext("cannot mount "
+ "'%s': filesystem already mounted\n"),
zfs_get_name(zhp));
return (1);
}
- if (zfs_share_iscsi(zhp) != 0)
+ if (zfs_mount(zhp, options, flags) != 0)
return (1);
+ break;
}
return (0);
@@ -3028,19 +5413,16 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
static void
report_mount_progress(int current, int total)
{
- static int len;
- static char *reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
- "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
- static time_t last_progress_time;
+ static time_t last_progress_time = 0;
time_t now = time(NULL);
+ char info[32];
/* report 1..n instead of 0..n-1 */
++current;
/* display header if we're here for the first time */
if (current == 1) {
- (void) printf(gettext("Mounting ZFS filesystems: "));
- len = 0;
+ set_progress_header(gettext("Mounting ZFS filesystems"));
} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
/* too soon to report again */
return;
@@ -3048,13 +5430,12 @@ report_mount_progress(int current, int total)
last_progress_time = now;
- /* back up to prepare for overwriting */
- if (len)
- (void) printf("%*.*s", len, len, reverse);
+ (void) sprintf(info, "(%d/%d)", current, total);
- /* We put a newline at the end if this is the last one. */
- len = printf("(%d/%d)%s", current, total, current == total ? "\n" : "");
- (void) fflush(stdout);
+ if (current == total)
+ finish_progress(info);
+ else
+ update_progress(info);
}
static void
@@ -3083,7 +5464,7 @@ share_mount(int op, int argc, char **argv)
boolean_t verbose = B_FALSE;
int c, ret = 0;
char *options = NULL;
- int types, flags = 0;
+ int flags = 0;
/* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
@@ -3133,24 +5514,16 @@ share_mount(int op, int argc, char **argv)
size_t i, count = 0;
char *protocol = NULL;
- if (op == OP_MOUNT) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (argc > 0) {
- if (strcmp(argv[0], "nfs") == 0 ||
- strcmp(argv[0], "smb") == 0) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (strcmp(argv[0], "iscsi") == 0) {
- types = ZFS_TYPE_VOLUME;
- } else {
+ if (op == OP_SHARE && argc > 0) {
+ if (strcmp(argv[0], "nfs") != 0 &&
+ strcmp(argv[0], "smb") != 0) {
(void) fprintf(stderr, gettext("share type "
- "must be 'nfs', 'smb' or 'iscsi'\n"));
+ "must be 'nfs' or 'smb'\n"));
usage(B_FALSE);
}
protocol = argv[0];
argc--;
argv++;
- } else {
- types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
}
if (argc != 0) {
@@ -3158,12 +5531,13 @@ share_mount(int op, int argc, char **argv)
usage(B_FALSE);
}
- get_all_datasets(types, &dslist, &count, verbose);
+ start_progress_timer();
+ get_all_datasets(&dslist, &count, verbose);
if (count == 0)
return (0);
- qsort(dslist, count, sizeof (void *), dataset_cmp);
+ qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
for (i = 0; i < count; i++) {
if (verbose)
@@ -3177,8 +5551,7 @@ share_mount(int op, int argc, char **argv)
free(dslist);
} else if (argc == 0) {
- struct statfs *sfs;
- int i, n;
+ struct mnttab entry;
if ((op == OP_SHARE) || (options != NULL)) {
(void) fprintf(stderr, gettext("missing filesystem "
@@ -3191,33 +5564,27 @@ share_mount(int op, int argc, char **argv)
* display any active ZFS mounts. We hide any snapshots, since
* they are controlled automatically.
*/
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
- return (0);
- }
- for (i = 0; i < n; i++) {
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0 ||
- strchr(sfs[i].f_mntfromname, '@') != NULL)
+ rewind(mnttab_file);
+ while (getmntent(mnttab_file, &entry) == 0) {
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 ||
+ strchr(entry.mnt_special, '@') != NULL)
continue;
- (void) printf("%-30s %s\n", sfs[i].f_mntfromname,
- sfs[i].f_mntonname);
+ (void) printf("%-30s %s\n", entry.mnt_special,
+ entry.mnt_mountp);
}
} else {
zfs_handle_t *zhp;
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
if (argc > 1) {
(void) fprintf(stderr,
gettext("too many arguments\n"));
usage(B_FALSE);
}
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) {
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL) {
ret = 1;
} else {
ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
@@ -3230,7 +5597,7 @@ share_mount(int op, int argc, char **argv)
}
/*
- * zfs mount -a [nfs | iscsi]
+ * zfs mount -a [nfs]
* zfs mount filesystem
*
* Mount all filesystems, or mount the given filesystem.
@@ -3242,7 +5609,7 @@ zfs_do_mount(int argc, char **argv)
}
/*
- * zfs share -a [nfs | iscsi | smb]
+ * zfs share -a [nfs | smb]
* zfs share filesystem
*
* Share all filesystems, or share the given filesystem.
@@ -3280,7 +5647,7 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
zfs_handle_t *zhp;
int ret;
struct stat64 statbuf;
- struct mnttab search = { 0 }, entry;
+ struct extmnttab entry;
const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
ino_t path_inode;
@@ -3300,9 +5667,26 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
/*
* Search for the given (major,minor) pair in the mount table.
*/
- search.mnt_mountp = path;
+#ifdef sun
rewind(mnttab_file);
- if (getmntany(mnttab_file, &entry, &search) != 0) {
+ while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) {
+ if (entry.mnt_major == major(statbuf.st_dev) &&
+ entry.mnt_minor == minor(statbuf.st_dev))
+ break;
+ }
+#else
+ {
+ struct statfs sfs;
+
+ if (statfs(path, &sfs) != 0) {
+ (void) fprintf(stderr, "%s: %s\n", path,
+ strerror(errno));
+ ret = -1;
+ }
+ statfs2mnttab(&sfs, &entry);
+ }
+#endif
+ if (ret != 0) {
if (op == OP_SHARE) {
(void) fprintf(stderr, gettext("cannot %s '%s': not "
"currently mounted\n"), cmdname, path);
@@ -3392,9 +5776,9 @@ unshare_unmount(int op, int argc, char **argv)
int do_all = 0;
int flags = 0;
int ret = 0;
- int types, c;
+ int c;
zfs_handle_t *zhp;
- char nfsiscsi_mnt_prop[ZFS_MAXPROPLEN];
+ char nfs_mnt_prop[ZFS_MAXPROPLEN];
char sharesmb[ZFS_MAXPROPLEN];
/* check options */
@@ -3431,51 +5815,37 @@ unshare_unmount(int op, int argc, char **argv)
* the special type (dataset name), and walk the result in
* reverse to make sure to get any snapshots first.
*/
+ struct mnttab entry;
uu_avl_pool_t *pool;
uu_avl_t *tree;
unshare_unmount_node_t *node;
uu_avl_index_t idx;
uu_avl_walk_t *walk;
- struct statfs *sfs;
- int i, n;
if (argc != 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
- if ((pool = uu_avl_pool_create("unmount_pool",
+ if (((pool = uu_avl_pool_create("unmount_pool",
sizeof (unshare_unmount_node_t),
offsetof(unshare_unmount_node_t, un_avlnode),
- unshare_unmount_compare,
- UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
-
- if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
+ unshare_unmount_compare, UU_DEFAULT)) == NULL) ||
+ ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL))
+ nomem();
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "getmntinfo() failed\n"));
- exit(1);
- }
- for (i = 0; i < n; i++) {
+ rewind(mnttab_file);
+ while (getmntent(mnttab_file, &entry) == 0) {
/* ignore non-ZFS entries */
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0)
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
continue;
/* ignore snapshots */
- if (strchr(sfs[i].f_mntfromname, '@') != NULL)
+ if (strchr(entry.mnt_special, '@') != NULL)
continue;
- if ((zhp = zfs_open(g_zfs, sfs[i].f_mntfromname,
+ if ((zhp = zfs_open(g_zfs, entry.mnt_special,
ZFS_TYPE_FILESYSTEM)) == NULL) {
ret = 1;
continue;
@@ -3484,25 +5854,25 @@ unshare_unmount(int op, int argc, char **argv)
switch (op) {
case OP_SHARE:
verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") != 0)
+ if (strcmp(nfs_mnt_prop, "off") != 0)
break;
verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0)
+ if (strcmp(nfs_mnt_prop, "off") == 0)
continue;
break;
case OP_MOUNT:
/* Ignore legacy mounts */
verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0)
+ if (strcmp(nfs_mnt_prop, "legacy") == 0)
continue;
/* Ignore canmount=noauto mounts */
if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
@@ -3514,13 +5884,7 @@ unshare_unmount(int op, int argc, char **argv)
node = safe_malloc(sizeof (unshare_unmount_node_t));
node->un_zhp = zhp;
-
- if ((node->un_mountp = strdup(sfs[i].f_mntonname)) ==
- NULL) {
- (void) fprintf(stderr, gettext("internal error:"
- " out of memory\n"));
- exit(1);
- }
+ node->un_mountp = safe_strdup(entry.mnt_mountp);
uu_avl_node_init(node, &node->un_avlnode, pool);
@@ -3538,11 +5902,8 @@ unshare_unmount(int op, int argc, char **argv)
* removing it from the AVL tree in the process.
*/
if ((walk = uu_avl_walk_start(tree,
- UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
+ UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL)
+ nomem();
while ((node = uu_avl_walk_next(walk)) != NULL) {
uu_avl_remove(tree, node);
@@ -3570,29 +5931,6 @@ unshare_unmount(int op, int argc, char **argv)
uu_avl_destroy(tree);
uu_avl_pool_destroy(pool);
- if (op == OP_SHARE) {
- /*
- * Finally, unshare any volumes shared via iSCSI.
- */
- zfs_handle_t **dslist = NULL;
- size_t i, count = 0;
-
- get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count,
- B_FALSE);
-
- if (count != 0) {
- qsort(dslist, count, sizeof (void *),
- dataset_cmp);
-
- for (i = 0; i < count; i++) {
- if (zfs_unshare_iscsi(dslist[i]) != 0)
- ret = 1;
- zfs_close(dslist[i]);
- }
-
- free(dslist);
- }
- }
} else {
if (argc != 1) {
if (argc == 0)
@@ -3614,91 +5952,63 @@ unshare_unmount(int op, int argc, char **argv)
return (unshare_unmount_path(op, argv[0],
flags, B_FALSE));
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
- if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
- nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop), NULL,
- NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, op == OP_SHARE ?
+ ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
+ nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL,
+ NULL, 0, B_FALSE) == 0);
- switch (op) {
- case OP_SHARE:
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
- NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
- sharesmb, sizeof (sharesmb), NULL, NULL,
- 0, B_FALSE) == 0);
-
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0 &&
- strcmp(sharesmb, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': legacy share\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "unshare(1M) to unshare this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_shared(zhp)) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently "
- "shared\n"), zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unshareall(zhp) != 0) {
- ret = 1;
- }
- break;
-
- case OP_MOUNT:
- if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': legacy "
- "mountpoint\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "umount(1M) to unmount this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_mounted(zhp, NULL)) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': not currently "
- "mounted\n"),
- zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unmountall(zhp, flags) != 0) {
- ret = 1;
- }
- break;
- }
- } else {
- assert(op == OP_SHARE);
-
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI,
- nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop),
+ switch (op) {
+ case OP_SHARE:
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
+ sharesmb, sizeof (sharesmb), NULL, NULL,
+ 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot unshare "
- "'%s': 'shareiscsi' property not set\n"),
+ if (strcmp(nfs_mnt_prop, "off") == 0 &&
+ strcmp(sharesmb, "off") == 0) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unshare '%s': legacy share\n"),
zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set "
- "'shareiscsi' property or use "
- "iscsitadm(1M) to share this volume\n"));
+ (void) fprintf(stderr, gettext("use "
+ "unshare(1M) to unshare this "
+ "filesystem\n"));
ret = 1;
- } else if (!zfs_is_shared_iscsi(zhp)) {
+ } else if (!zfs_is_shared(zhp)) {
(void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently shared\n"),
+ "unshare '%s': not currently "
+ "shared\n"), zfs_get_name(zhp));
+ ret = 1;
+ } else if (zfs_unshareall(zhp) != 0) {
+ ret = 1;
+ }
+ break;
+
+ case OP_MOUNT:
+ if (strcmp(nfs_mnt_prop, "legacy") == 0) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unmount '%s': legacy "
+ "mountpoint\n"), zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use "
+ "umount(1M) to unmount this "
+ "filesystem\n"));
+ ret = 1;
+ } else if (!zfs_is_mounted(zhp, NULL)) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unmount '%s': not currently "
+ "mounted\n"),
zfs_get_name(zhp));
ret = 1;
- } else if (zfs_unshare_iscsi(zhp) != 0) {
+ } else if (zfs_unmountall(zhp, flags) != 0) {
ret = 1;
}
+ break;
}
zfs_close(zhp);
@@ -3793,16 +6103,6 @@ zfs_do_unjail(int argc, char **argv)
return (do_jail(argc, argv, 0));
}
-/* ARGSUSED */
-static int
-zfs_do_python(int argc, char **argv)
-{
- (void) execv(pypath, argv-1);
- (void) fprintf(stderr, "internal error: %s not found\n", pypath);
- (void) fprintf(stderr, " install sysutils/py-zfs port to correct this\n");
- return (-1);
-}
-
/*
* Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
* 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
@@ -3825,14 +6125,10 @@ manual_mount(int argc, char **argv)
(void) strlcpy(mntopts, optarg, sizeof (mntopts));
break;
case 'O':
-#if 0 /* FreeBSD: No support for MS_OVERLAY. */
flags |= MS_OVERLAY;
-#endif
break;
case 'm':
-#if 0 /* FreeBSD: No support for MS_NOMNTTAB. */
flags |= MS_NOMNTTAB;
-#endif
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -3943,27 +6239,6 @@ manual_unmount(int argc, char **argv)
}
static int
-volcheck(zpool_handle_t *zhp, void *data)
-{
- boolean_t isinit = *((boolean_t *)data);
-
- if (isinit)
- return (zpool_create_zvol_links(zhp));
- else
- return (zpool_remove_zvol_links(zhp));
-}
-
-/*
- * Iterate over all pools in the system and either create or destroy /dev/zvol
- * links, depending on the value of 'isinit'.
- */
-static int
-do_volcheck(boolean_t isinit)
-{
- return (zpool_iter(g_zfs, volcheck, &isinit) ? 1 : 0);
-}
-
-static int
find_command_idx(char *command, int *idx)
{
int i;
@@ -3980,6 +6255,81 @@ find_command_idx(char *command, int *idx)
return (1);
}
+static int
+zfs_do_diff(int argc, char **argv)
+{
+ zfs_handle_t *zhp;
+ int flags = 0;
+ char *tosnap = NULL;
+ char *fromsnap = NULL;
+ char *atp, *copy;
+ int err;
+ int c;
+
+ while ((c = getopt(argc, argv, "FHt")) != -1) {
+ switch (c) {
+ case 'F':
+ flags |= ZFS_DIFF_CLASSIFY;
+ break;
+ case 'H':
+ flags |= ZFS_DIFF_PARSEABLE;
+ break;
+ case 't':
+ flags |= ZFS_DIFF_TIMESTAMP;
+ break;
+ default:
+ (void) fprintf(stderr,
+ gettext("invalid option '%c'\n"), optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr,
+ gettext("must provide at least one snapshot name\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 2) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ fromsnap = argv[0];
+ tosnap = (argc == 2) ? argv[1] : NULL;
+
+ copy = NULL;
+ if (*fromsnap != '@')
+ copy = strdup(fromsnap);
+ else if (tosnap)
+ copy = strdup(tosnap);
+ if (copy == NULL)
+ usage(B_FALSE);
+
+ if (atp = strchr(copy, '@'))
+ *atp = '\0';
+
+ if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL)
+ return (1);
+
+ free(copy);
+
+ /*
+ * Ignore SIGPIPE so that the library can give us
+ * information on any failure
+ */
+ (void) sigignore(SIGPIPE);
+
+ err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags);
+
+ zfs_close(zhp);
+
+ return (err != 0);
+}
+
int
main(int argc, char **argv)
{
@@ -4049,15 +6399,6 @@ main(int argc, char **argv)
usage(B_TRUE);
/*
- * 'volinit' and 'volfini' do not appear in the usage message,
- * so we have to special case them here.
- */
- if (strcmp(cmdname, "volinit") == 0)
- return (do_volcheck(B_TRUE));
- else if (strcmp(cmdname, "volfini") == 0)
- return (do_volcheck(B_FALSE));
-
- /*
* Run the appropriate command.
*/
libzfs_mnttab_cache(g_zfs, B_TRUE);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
index c7f2f1618647..3ddff9e22d7d 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZFS_UTIL_H
#define _ZFS_UTIL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <libzfs.h>
#ifdef __cplusplus
@@ -35,6 +32,7 @@ extern "C" {
#endif
void * safe_malloc(size_t size);
+void nomem(void);
libzfs_handle_t *g_zfs;
#ifdef __cplusplus
diff --git a/cddl/contrib/opensolaris/cmd/zinject/translate.c b/cddl/contrib/opensolaris/cmd/zinject/translate.c
index da26cd633302..442f220c442a 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/translate.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/translate.c
@@ -19,14 +19,11 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libzfs.h>
-#undef verify /* both libzfs.h and zfs_context.h want to define this */
-
#include <sys/zfs_context.h>
#include <errno.h>
@@ -49,9 +46,6 @@
#include "zinject.h"
-#include <assert.h>
-#define verify assert
-
extern void kernel_init(int);
extern void kernel_fini(void);
@@ -70,6 +64,18 @@ ziprintf(const char *fmt, ...)
va_end(ap);
}
+static void
+compress_slashes(const char *src, char *dest)
+{
+ while (*src != '\0') {
+ *dest = *src++;
+ while (*dest == '/' && *src == '/')
+ ++src;
+ ++dest;
+ }
+ *dest = '\0';
+}
+
/*
* Given a full path to a file, translate into a dataset name and a relative
* path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
@@ -77,11 +83,14 @@ ziprintf(const char *fmt, ...)
* buffer, which we need later to get the object ID.
*/
static int
-parse_pathname(const char *fullpath, char *dataset, char *relpath,
+parse_pathname(const char *inpath, char *dataset, char *relpath,
struct stat64 *statbuf)
{
struct statfs sfs;
const char *rel;
+ char fullpath[MAXPATHLEN];
+
+ compress_slashes(inpath, fullpath);
if (fullpath[0] != '/') {
(void) fprintf(stderr, "invalid object '%s': must be full "
@@ -148,8 +157,8 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/
sync();
- if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
- DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+ if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
return (-1);
@@ -158,7 +167,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino;
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
return (0);
}
@@ -233,17 +242,17 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* Get the dnode associated with object, so we can calculate the block
* size.
*/
- if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
+ B_TRUE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
goto out;
}
if (record->zi_object == 0) {
- dn = os->os->os_meta_dnode;
+ dn = DMU_META_DNODE(os);
} else {
- err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+ err = dnode_hold(os, record->zi_object, FTAG, &dn);
if (err != 0) {
(void) fprintf(stderr, "failed to hold dnode "
"for object %llu\n",
@@ -292,11 +301,11 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
ret = 0;
out:
if (dn) {
- if (dn != os->os->os_meta_dnode)
+ if (dn != DMU_META_DNODE(os))
dnode_rele(dn, FTAG);
}
if (os)
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
return (ret);
}
@@ -333,8 +342,8 @@ translate_record(err_type_t type, const char *object, const char *range,
case TYPE_CONFIG:
record->zi_type = DMU_OT_PACKED_NVLIST;
break;
- case TYPE_BPLIST:
- record->zi_type = DMU_OT_BPLIST;
+ case TYPE_BPOBJ:
+ record->zi_type = DMU_OT_BPOBJ;
break;
case TYPE_SPACEMAP:
record->zi_type = DMU_OT_SPACE_MAP;
@@ -455,6 +464,14 @@ translate_device(const char *pool, const char *device, err_type_t label_type,
record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
break;
+ case TYPE_LABEL_PAD1:
+ record->zi_start = offsetof(vdev_label_t, vl_pad1);
+ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+ break;
+ case TYPE_LABEL_PAD2:
+ record->zi_start = offsetof(vdev_label_t, vl_pad2);
+ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+ break;
}
return (0);
}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
index e8327e8dcdf5..51d2fc97ccd0 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -42,12 +41,12 @@
* any attempt to read from the device will return EIO, but any attempt to
* reopen the device will also return ENXIO.
* For label faults, the -L option must be specified. This allows faults
- * to be injected into either the nvlist or uberblock region of all the labels
- * for the specified device.
+ * to be injected into either the nvlist, uberblock, pad1, or pad2 region
+ * of all the labels for the specified device.
*
* This form of the command looks like:
*
- * zinject -d device [-e errno] [-L <uber | nvlist>] pool
+ * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
*
*
* DATA FAULTS
@@ -70,7 +69,7 @@
* mos Any data in the MOS
* mosdir object directory
* config pool configuration
- * bplist blkptr list
+ * bpobj blkptr list
* spacemap spacemap
* metaslab metaslab
* errlog persistent error log
@@ -167,11 +166,13 @@ static const char *errtable[TYPE_INVAL] = {
"mosdir",
"metaslab",
"config",
- "bplist",
+ "bpobj",
"spacemap",
"errlog",
"uber",
- "nvlist"
+ "nvlist",
+ "pad1",
+ "pad2"
};
static err_type_t
@@ -195,8 +196,8 @@ type_to_name(uint64_t type)
return ("metaslab");
case DMU_OT_PACKED_NVLIST:
return ("config");
- case DMU_OT_BPLIST:
- return ("bplist");
+ case DMU_OT_BPOBJ:
+ return ("bpobj");
case DMU_OT_SPACE_MAP:
return ("spacemap");
case DMU_OT_ERROR_LOG:
@@ -225,10 +226,27 @@ usage(void)
"\t\tClear the particular record (if given a numeric ID), or\n"
"\t\tall records if 'all' is specificed.\n"
"\n"
- "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
+ "\tzinject -p <function name> pool\n"
+ "\t\tInject a panic fault at the specified function. Only \n"
+ "\t\tfunctions which call spa_vdev_config_exit(), or \n"
+ "\t\tspa_vdev_exit() will trigger a panic.\n"
+ "\n"
+ "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
+ "\t [-T <read|write|free|claim|all> pool\n"
"\t\tInject a fault into a particular device or the device's\n"
- "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
- "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
+ "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
+ "\t\t'pad1', or 'pad2'.\n"
+ "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
+ "\n"
+ "\tzinject -d device -A <degrade|fault> pool\n"
+ "\t\tPerform a specific action on a particular device\n"
+ "\n"
+ "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
+ "\t\tCause the pool to stop writing blocks yet not\n"
+ "\t\treport errors for a duration. Simulates buggy hardware\n"
+ "\t\tthat fails to honor cache flush requests.\n"
+ "\t\tDefault duration is 30 seconds. The machine is panicked\n"
+ "\t\tat the end of the duration.\n"
"\n"
"\tzinject -b objset:object:level:blkid pool\n"
"\n"
@@ -270,7 +288,7 @@ usage(void)
"\t\t\ton a ZFS filesystem.\n"
"\n"
"\t-t <mos>\tInject errors into the MOS for objects of the given\n"
- "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
+ "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
"\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
"\t\t\tthe poolname.\n");
}
@@ -289,6 +307,12 @@ iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
&zc.zc_inject_record, data)) != 0)
return (ret);
+ if (errno != ENOENT) {
+ (void) fprintf(stderr, "Unable to list handlers: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+
return (0);
}
@@ -298,7 +322,7 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
- if (record->zi_guid != 0)
+ if (record->zi_guid != 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@@ -330,7 +354,7 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
- if (record->zi_guid == 0)
+ if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@@ -346,6 +370,27 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0);
}
+static int
+print_panic_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_func[0] == '\0')
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
+ (void) printf("--- --------------- ----------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
+
+ return (0);
+}
+
/*
* Print all registered error handlers. Returns the number of handlers
* registered.
@@ -353,14 +398,25 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
static int
print_all_handlers(void)
{
- int count = 0;
+ int count = 0, total = 0;
(void) iter_handlers(print_device_handler, &count);
- (void) printf("\n");
- count = 0;
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
(void) iter_handlers(print_data_handler, &count);
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
+ (void) iter_handlers(print_panic_handler, &count);
- return (count);
+ return (count + total);
}
/* ARGSUSED */
@@ -389,7 +445,8 @@ cancel_all_handlers(void)
{
int ret = iter_handlers(cancel_one_handler, NULL);
- (void) printf("removed all registered handlers\n");
+ if (ret == 0)
+ (void) printf("removed all registered handlers\n");
return (ret);
}
@@ -446,6 +503,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
if (record->zi_guid) {
(void) printf(" vdev: %llx\n",
(u_longlong_t)record->zi_guid);
+ } else if (record->zi_func[0] != '\0') {
+ (void) printf(" panic function: %s\n",
+ record->zi_func);
+ } else if (record->zi_duration > 0) {
+ (void) printf(" time: %lld seconds\n",
+ (u_longlong_t)record->zi_duration);
+ } else if (record->zi_duration < 0) {
+ (void) printf(" txgs: %lld \n",
+ (u_longlong_t)-record->zi_duration);
} else {
(void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset);
@@ -468,6 +534,22 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
}
int
+perform_action(const char *pool, zinject_record_t *record, int cmd)
+{
+ zfs_cmd_t zc;
+
+ ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
+ (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+ zc.zc_guid = record->zi_guid;
+ zc.zc_cookie = cmd;
+
+ if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ return (0);
+
+ return (1);
+}
+
+int
main(int argc, char **argv)
{
int c;
@@ -480,12 +562,17 @@ main(int argc, char **argv)
int quiet = 0;
int error = 0;
int domount = 0;
+ int io_type = ZIO_TYPES;
+ int action = VDEV_STATE_UNKNOWN;
err_type_t type = TYPE_INVAL;
err_type_t label = TYPE_INVAL;
zinject_record_t record = { 0 };
char pool[MAXNAMELEN];
char dataset[MAXNAMELEN];
zfs_handle_t *zhp;
+ int nowrites = 0;
+ int dur_txg = 0;
+ int dur_secs = 0;
int ret;
int flags = 0;
@@ -517,11 +604,24 @@ main(int argc, char **argv)
return (0);
}
- while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
+ while ((c = getopt(argc, argv,
+ ":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
break;
+ case 'A':
+ if (strcasecmp(optarg, "degrade") == 0) {
+ action = VDEV_STATE_DEGRADED;
+ } else if (strcasecmp(optarg, "fault") == 0) {
+ action = VDEV_STATE_FAULTED;
+ } else {
+ (void) fprintf(stderr, "invalid action '%s': "
+ "must be 'degrade' or 'fault'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
case 'b':
raw = optarg;
break;
@@ -538,6 +638,8 @@ main(int argc, char **argv)
error = ECKSUM;
} else if (strcasecmp(optarg, "nxio") == 0) {
error = ENXIO;
+ } else if (strcasecmp(optarg, "dtl") == 0) {
+ error = ECHILD;
} else {
(void) fprintf(stderr, "invalid error type "
"'%s': must be 'io', 'checksum' or "
@@ -557,9 +659,27 @@ main(int argc, char **argv)
case 'F':
record.zi_failfast = B_TRUE;
break;
+ case 'g':
+ dur_txg = 1;
+ record.zi_duration = (int)strtol(optarg, &end, 10);
+ if (record.zi_duration <= 0 || *end != '\0') {
+ (void) fprintf(stderr, "invalid duration '%s': "
+ "must be a positive integer\n", optarg);
+ usage();
+ return (1);
+ }
+ /* store duration of txgs as its negative */
+ record.zi_duration *= -1;
+ break;
case 'h':
usage();
return (0);
+ case 'I':
+ /* default duration, if one hasn't yet been defined */
+ nowrites = 1;
+ if (dur_secs == 0 && dur_txg == 0)
+ record.zi_duration = 30;
+ break;
case 'l':
level = (int)strtol(optarg, &end, 10);
if (*end != '\0') {
@@ -572,12 +692,45 @@ main(int argc, char **argv)
case 'm':
domount = 1;
break;
+ case 'p':
+ (void) strlcpy(record.zi_func, optarg,
+ sizeof (record.zi_func));
+ break;
case 'q':
quiet = 1;
break;
case 'r':
range = optarg;
break;
+ case 's':
+ dur_secs = 1;
+ record.zi_duration = (int)strtol(optarg, &end, 10);
+ if (record.zi_duration <= 0 || *end != '\0') {
+ (void) fprintf(stderr, "invalid duration '%s': "
+ "must be a positive integer\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'T':
+ if (strcasecmp(optarg, "read") == 0) {
+ io_type = ZIO_TYPE_READ;
+ } else if (strcasecmp(optarg, "write") == 0) {
+ io_type = ZIO_TYPE_WRITE;
+ } else if (strcasecmp(optarg, "free") == 0) {
+ io_type = ZIO_TYPE_FREE;
+ } else if (strcasecmp(optarg, "claim") == 0) {
+ io_type = ZIO_TYPE_CLAIM;
+ } else if (strcasecmp(optarg, "all") == 0) {
+ io_type = ZIO_TYPES;
+ } else {
+ (void) fprintf(stderr, "invalid I/O type "
+ "'%s': must be 'read', 'write', 'free', "
+ "'claim' or 'all'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
case 't':
if ((type = name_to_type(optarg)) == TYPE_INVAL &&
!MOS_TYPE(type)) {
@@ -620,7 +773,8 @@ main(int argc, char **argv)
* '-c' is invalid with any other options.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0) {
+ level != 0 || record.zi_func[0] != '\0' ||
+ record.zi_duration != 0) {
(void) fprintf(stderr, "cancel (-c) incompatible with "
"any other options\n");
usage();
@@ -652,7 +806,8 @@ main(int argc, char **argv)
* for doing injection, so handle it separately here.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0) {
+ level != 0 || record.zi_func[0] != '\0' ||
+ record.zi_duration != 0) {
(void) fprintf(stderr, "device (-d) incompatible with "
"data error injection\n");
usage();
@@ -675,12 +830,18 @@ main(int argc, char **argv)
return (1);
}
+ record.zi_iotype = io_type;
if (translate_device(pool, device, label, &record) != 0)
return (1);
if (!error)
error = ENXIO;
+
+ if (action != VDEV_STATE_UNKNOWN)
+ return (perform_action(pool, &record, action));
+
} else if (raw != NULL) {
- if (range != NULL || type != TYPE_INVAL || level != 0) {
+ if (range != NULL || type != TYPE_INVAL || level != 0 ||
+ record.zi_func[0] != '\0' || record.zi_duration != 0) {
(void) fprintf(stderr, "raw (-b) format with "
"any other options\n");
usage();
@@ -707,10 +868,52 @@ main(int argc, char **argv)
return (1);
if (!error)
error = EIO;
+ } else if (record.zi_func[0] != '\0') {
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0 || device != NULL || record.zi_duration != 0) {
+ (void) fprintf(stderr, "panic (-p) incompatible with "
+ "other options\n");
+ usage();
+ return (2);
+ }
+
+ if (argc < 1 || argc > 2) {
+ (void) fprintf(stderr, "panic (-p) injection requires "
+ "a single pool name and an optional id\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ if (argv[1] != NULL)
+ record.zi_type = atoi(argv[1]);
+ dataset[0] = '\0';
+ } else if (record.zi_duration != 0) {
+ if (nowrites == 0) {
+ (void) fprintf(stderr, "-s or -g meaningless "
+ "without -I (ignore writes)\n");
+ usage();
+ return (2);
+ } else if (dur_secs && dur_txg) {
+ (void) fprintf(stderr, "choose a duration either "
+ "in seconds (-s) or a number of txgs (-g) "
+ "but not both\n");
+ usage();
+ return (2);
+ } else if (argc != 1) {
+ (void) fprintf(stderr, "ignore writes (-I) "
+ "injection requires a single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
} else if (type == TYPE_INVAL) {
if (flags == 0) {
(void) fprintf(stderr, "at least one of '-b', '-d', "
- "'-t', '-a', or '-u' must be specified\n");
+ "'-t', '-a', '-p', '-I' or '-u' "
+ "must be specified\n");
usage();
return (2);
}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.h b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
index adc3efe80400..46fdcad8b31f 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.h
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZINJECT_H
#define _ZINJECT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
@@ -41,11 +38,13 @@ typedef enum {
TYPE_MOSDIR, /* MOS object directory */
TYPE_METASLAB, /* metaslab objects */
TYPE_CONFIG, /* MOS config */
- TYPE_BPLIST, /* block pointer list */
+ TYPE_BPOBJ, /* block pointer list */
TYPE_SPACEMAP, /* space map objects */
TYPE_ERRLOG, /* persistent error log */
TYPE_LABEL_UBERBLOCK, /* label specific uberblock */
TYPE_LABEL_NVLIST, /* label specific nvlist */
+ TYPE_LABEL_PAD1, /* label specific 8K pad1 area */
+ TYPE_LABEL_PAD2, /* label specific 8K pad2 area */
TYPE_INVAL
} err_type_t;
diff --git a/cddl/contrib/opensolaris/cmd/zlook/zlook.c b/cddl/contrib/opensolaris/cmd/zlook/zlook.c
new file mode 100644
index 000000000000..29a6559f9023
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zlook/zlook.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This is a test program that uses ioctls to the ZFS Unit Test driver
+ * to perform readdirs or lookups using flags not normally available
+ * to user-land programs. This allows testing of the flags'
+ * behavior outside of a complicated consumer, such as the SMB driver.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/dirent.h>
+#include <sys/attr.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <string.h>
+#include <time.h>
+
+#define _KERNEL
+
+#include <sys/fs/zut.h>
+#include <sys/extdirent.h>
+
+#undef _KERNEL
+
+#define MAXBUF (64 * 1024)
+#define BIGBUF 4096
+#define LILBUF (sizeof (dirent_t))
+
+#define DIRENT_NAMELEN(reclen) \
+ ((reclen) - (offsetof(dirent_t, d_name[0])))
+
+static void
+usage(char *pnam)
+{
+ (void) fprintf(stderr, "Usage:\n %s -l [-is] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, " %s -i [-ls] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, " %s -s [-il] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, "\t Perform a lookup\n");
+ (void) fprintf(stderr, "\t -l == lookup\n");
+ (void) fprintf(stderr, "\t -i == request FIGNORECASE\n");
+ (void) fprintf(stderr, "\t -s == request stat(2) and xvattr info\n");
+ (void) fprintf(stderr, " %s -r [-ea] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, " %s -e [-ra] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, " %s -a [-re] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, "\t Perform a readdir\n");
+ (void) fprintf(stderr, "\t -r == readdir\n");
+ (void) fprintf(stderr, "\t -e == request extended entries\n");
+ (void) fprintf(stderr, "\t -a == request access filtering\n");
+ (void) fprintf(stderr, "\t -b == buffer size (default 4K)\n");
+ (void) fprintf(stderr, " %s -A path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_ACCESS_FILTERING "
+ "for path with pathconf(2)\n");
+ (void) fprintf(stderr, " %s -E path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS "
+ "for path with pathconf(2)\n");
+ (void) fprintf(stderr, " %s -S path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS "
+ "for path with pathconf(2)\n");
+ exit(EINVAL);
+}
+
+static void
+print_extd_entries(zut_readdir_t *r)
+{
+ struct edirent *eodp;
+ char *bufstart;
+
+ eodp = (edirent_t *)(uintptr_t)r->zr_buf;
+ bufstart = (char *)eodp;
+ while ((char *)eodp < bufstart + r->zr_bytes) {
+ char *blanks = " ";
+ int i = 0;
+ while (i < EDIRENT_NAMELEN(eodp->ed_reclen)) {
+ if (!eodp->ed_name[i])
+ break;
+ (void) printf("%c", eodp->ed_name[i++]);
+ }
+ if (i < 16)
+ (void) printf("%.*s", 16 - i, blanks);
+ (void) printf("\t%x\n", eodp->ed_eflags);
+ eodp = (edirent_t *)((intptr_t)eodp + eodp->ed_reclen);
+ }
+}
+
+static void
+print_entries(zut_readdir_t *r)
+{
+ dirent64_t *dp;
+ char *bufstart;
+
+ dp = (dirent64_t *)(intptr_t)r->zr_buf;
+ bufstart = (char *)dp;
+ while ((char *)dp < bufstart + r->zr_bytes) {
+ int i = 0;
+ while (i < DIRENT_NAMELEN(dp->d_reclen)) {
+ if (!dp->d_name[i])
+ break;
+ (void) printf("%c", dp->d_name[i++]);
+ }
+ (void) printf("\n");
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
+ }
+}
+
+static void
+print_stats(struct stat64 *sb)
+{
+ char timebuf[512];
+
+ (void) printf("st_mode\t\t\t%04lo\n", (unsigned long)sb->st_mode);
+ (void) printf("st_ino\t\t\t%llu\n", (unsigned long long)sb->st_ino);
+ (void) printf("st_nlink\t\t%lu\n", (unsigned long)sb->st_nlink);
+ (void) printf("st_uid\t\t\t%d\n", sb->st_uid);
+ (void) printf("st_gid\t\t\t%d\n", sb->st_gid);
+ (void) printf("st_size\t\t\t%lld\n", (long long)sb->st_size);
+ (void) printf("st_blksize\t\t%ld\n", (long)sb->st_blksize);
+ (void) printf("st_blocks\t\t%lld\n", (long long)sb->st_blocks);
+
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_atime, timebuf, 512)) {
+ (void) printf("st_atime\t\t");
+ (void) printf("%s", timebuf);
+ }
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_mtime, timebuf, 512)) {
+ (void) printf("st_mtime\t\t");
+ (void) printf("%s", timebuf);
+ }
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_ctime, timebuf, 512)) {
+ (void) printf("st_ctime\t\t");
+ (void) printf("%s", timebuf);
+ }
+}
+
+static void
+print_xvs(uint64_t xvs)
+{
+ uint_t bits;
+ int idx = 0;
+
+ if (xvs == 0)
+ return;
+
+ (void) printf("-------------------\n");
+ (void) printf("Attribute bit(s) set:\n");
+ (void) printf("-------------------\n");
+
+ bits = xvs & ((1 << F_ATTR_ALL) - 1);
+ while (bits) {
+ uint_t rest = bits >> 1;
+ if (bits & 1) {
+ (void) printf("%s", attr_to_name((f_attr_t)idx));
+ if (rest)
+ (void) printf(", ");
+ }
+ idx++;
+ bits = rest;
+ }
+ (void) printf("\n");
+}
+
+int
+main(int argc, char **argv)
+{
+ zut_lookup_t lk = {0};
+ zut_readdir_t rd = {0};
+ boolean_t checking = B_FALSE;
+ boolean_t looking = B_FALSE;
+ boolean_t reading = B_FALSE;
+ boolean_t bflag = B_FALSE;
+ long rddir_bufsize = BIGBUF;
+ int error = 0;
+ int check;
+ int fd;
+ int c;
+
+ while ((c = getopt(argc, argv, "lisaerb:ASE")) != -1) {
+ switch (c) {
+ case 'l':
+ looking = B_TRUE;
+ break;
+ case 'i':
+ lk.zl_reqflags |= ZUT_IGNORECASE;
+ looking = B_TRUE;
+ break;
+ case 's':
+ lk.zl_reqflags |= ZUT_GETSTAT;
+ looking = B_TRUE;
+ break;
+ case 'a':
+ rd.zr_reqflags |= ZUT_ACCFILTER;
+ reading = B_TRUE;
+ break;
+ case 'e':
+ rd.zr_reqflags |= ZUT_EXTRDDIR;
+ reading = B_TRUE;
+ break;
+ case 'r':
+ reading = B_TRUE;
+ break;
+ case 'b':
+ reading = B_TRUE;
+ bflag = B_TRUE;
+ rddir_bufsize = strtol(optarg, NULL, 0);
+ break;
+ case 'A':
+ checking = B_TRUE;
+ check = _PC_ACCESS_FILTERING;
+ break;
+ case 'S':
+ checking = B_TRUE;
+ check = _PC_SATTR_ENABLED;
+ break;
+ case 'E':
+ checking = B_TRUE;
+ check = _PC_SATTR_EXISTS;
+ break;
+ case '?':
+ default:
+ usage(argv[0]); /* no return */
+ }
+ }
+
+ if ((checking && looking) || (checking && reading) ||
+ (looking && reading) || (!reading && bflag) ||
+ (!checking && !reading && !looking))
+ usage(argv[0]); /* no return */
+
+ if (rddir_bufsize < LILBUF || rddir_bufsize > MAXBUF) {
+ (void) fprintf(stderr, "Sorry, buffer size "
+ "must be >= %d and less than or equal to %d bytes.\n",
+ (int)LILBUF, MAXBUF);
+ exit(EINVAL);
+ }
+
+ if (checking) {
+ char pathbuf[MAXPATHLEN];
+ long result;
+
+ if (argc - optind < 1)
+ usage(argv[0]); /* no return */
+ (void) strlcpy(pathbuf, argv[optind], MAXPATHLEN);
+ result = pathconf(pathbuf, check);
+ (void) printf("pathconf(2) check for %s\n", pathbuf);
+ switch (check) {
+ case _PC_SATTR_ENABLED:
+ (void) printf("System attributes ");
+ if (result != 0)
+ (void) printf("Enabled\n");
+ else
+ (void) printf("Not enabled\n");
+ break;
+ case _PC_SATTR_EXISTS:
+ (void) printf("System attributes ");
+ if (result != 0)
+ (void) printf("Exist\n");
+ else
+ (void) printf("Do not exist\n");
+ break;
+ case _PC_ACCESS_FILTERING:
+ (void) printf("Access filtering ");
+ if (result != 0)
+ (void) printf("Available\n");
+ else
+ (void) printf("Not available\n");
+ break;
+ }
+ return (result);
+ }
+
+ if ((fd = open(ZUT_DEV, O_RDONLY)) < 0) {
+ perror(ZUT_DEV);
+ return (ENXIO);
+ }
+
+ if (reading) {
+ char *buf;
+
+ if (argc - optind < 1)
+ usage(argv[0]); /* no return */
+
+ (void) strlcpy(rd.zr_dir, argv[optind], MAXPATHLEN);
+ if (argc - optind > 1) {
+ (void) strlcpy(rd.zr_file, argv[optind + 1],
+ MAXNAMELEN);
+ rd.zr_reqflags |= ZUT_XATTR;
+ }
+
+ if ((buf = malloc(rddir_bufsize)) == NULL) {
+ error = errno;
+ perror("malloc");
+ (void) close(fd);
+ return (error);
+ }
+
+ rd.zr_buf = (uint64_t)(uintptr_t)buf;
+ rd.zr_buflen = rddir_bufsize;
+
+ while (!rd.zr_eof) {
+ int ierr;
+
+ if ((ierr = ioctl(fd, ZUT_IOC_READDIR, &rd)) != 0) {
+ (void) fprintf(stderr,
+ "IOCTL error: %s (%d)\n",
+ strerror(ierr), ierr);
+ free(buf);
+ (void) close(fd);
+ return (ierr);
+ }
+ if (rd.zr_retcode) {
+ (void) fprintf(stderr,
+ "readdir result: %s (%d)\n",
+ strerror(rd.zr_retcode), rd.zr_retcode);
+ free(buf);
+ (void) close(fd);
+ return (rd.zr_retcode);
+ }
+ if (rd.zr_reqflags & ZUT_EXTRDDIR)
+ print_extd_entries(&rd);
+ else
+ print_entries(&rd);
+ }
+ free(buf);
+ } else {
+ int ierr;
+
+ if (argc - optind < 2)
+ usage(argv[0]); /* no return */
+
+ (void) strlcpy(lk.zl_dir, argv[optind], MAXPATHLEN);
+ (void) strlcpy(lk.zl_file, argv[optind + 1], MAXNAMELEN);
+ if (argc - optind > 2) {
+ (void) strlcpy(lk.zl_xfile,
+ argv[optind + 2], MAXNAMELEN);
+ lk.zl_reqflags |= ZUT_XATTR;
+ }
+
+ if ((ierr = ioctl(fd, ZUT_IOC_LOOKUP, &lk)) != 0) {
+ (void) fprintf(stderr,
+ "IOCTL error: %s (%d)\n",
+ strerror(ierr), ierr);
+ (void) close(fd);
+ return (ierr);
+ }
+
+ (void) printf("\nLookup of ");
+ if (lk.zl_reqflags & ZUT_XATTR) {
+ (void) printf("extended attribute \"%s\" of ",
+ lk.zl_xfile);
+ }
+ (void) printf("file \"%s\" ", lk.zl_file);
+ (void) printf("in directory \"%s\" ", lk.zl_dir);
+ if (lk.zl_retcode) {
+ (void) printf("failed: %s (%d)\n",
+ strerror(lk.zl_retcode), lk.zl_retcode);
+ (void) close(fd);
+ return (lk.zl_retcode);
+ }
+
+ (void) printf("succeeded.\n");
+ if (lk.zl_reqflags & ZUT_IGNORECASE) {
+ (void) printf("----------------------------\n");
+ (void) printf("dirent flags: 0x%0x\n", lk.zl_deflags);
+ (void) printf("real name: %s\n", lk.zl_real);
+ }
+ if (lk.zl_reqflags & ZUT_GETSTAT) {
+ (void) printf("----------------------------\n");
+ print_stats(&lk.zl_statbuf);
+ print_xvs(lk.zl_xvattrs);
+ }
+ }
+
+ (void) close(fd);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
index b6c97c1a5eff..ff71dff16c32 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
@@ -1,9 +1,9 @@
'\" te
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
-.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
-.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zpool 1M "5 Mar 2009" "SunOS 5.11" "System Administration Commands"
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH zpool 1M "21 Sep 2009" "SunOS 5.11" "System Administration Commands"
.SH NAME
zpool \- configures ZFS storage pools
.SH SYNOPSIS
@@ -14,125 +14,125 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
- ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...
+\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...
.fi
.LP
.nf
-\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
+\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
.fi
.LP
.nf
-\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...
+\fBzpool clear\fR \fIpool\fR [\fIdevice\fR]
.fi
.LP
.nf
-\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
+\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
+ ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...
.fi
.LP
.nf
-\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
+\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
.fi
.LP
.nf
-\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
+\fBzpool detach\fR \fIpool\fR \fIdevice\fR
.fi
.LP
.nf
-\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
+\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
+\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
+\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool clear\fR \fIpool\fR [\fIdevice\fR]
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
.fi
.LP
.nf
-\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR
.fi
.LP
.nf
-\fBzpool detach\fR \fIpool\fR \fIdevice\fR
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR |\fIid\fR [\fInewpool\fR]
.fi
.LP
.nf
-\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
+\fBzpool iostat\fR [\fB-T\fR u | d ] [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
.fi
.LP
.nf
-\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
+\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
+\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-p\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
- [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR
+\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
- [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR |\fIid\fR [\fInewpool\fR]
+\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...
+\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
.fi
.LP
.nf
-\fBzpool upgrade\fR
+\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool upgrade\fR \fB-v\fR
+\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR
.fi
.LP
.nf
-\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...
+\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...
+\fBzpool upgrade\fR
.fi
.LP
.nf
-\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
+\fBzpool upgrade\fR \fB-v\fR
.fi
.LP
.nf
-\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR
+\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...
.fi
.SH DESCRIPTION
@@ -141,8 +141,8 @@ zpool \- configures ZFS storage pools
The \fBzpool\fR command configures \fBZFS\fR storage pools. A storage pool is a collection of devices that provides physical storage and data replication for \fBZFS\fR datasets.
.sp
.LP
-All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
-.SS "Virtual Devices (vdevs)"
+All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
+.SS "Virtual Devices (\fBvdev\fRs)"
.sp
.LP
A "virtual device" describes a single device or a collection of devices organized according to certain performance and fault characteristics. The following virtual devices are supported:
@@ -150,18 +150,18 @@ A "virtual device" describes a single device or a collection of devices organize
.ne 2
.mk
.na
-\fBdisk\fR
+\fB\fBdisk\fR\fR
.ad
.RS 10n
.rt
-A block device, typically located under "/dev/dsk". \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
+A block device, typically located under \fB/dev/dsk\fR. \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
.RE
.sp
.ne 2
.mk
.na
-\fBfile\fR
+\fB\fBfile\fR\fR
.ad
.RS 10n
.rt
@@ -172,7 +172,7 @@ A regular file. The use of files as a backing store is strongly discouraged. It
.ne 2
.mk
.na
-\fBmirror\fR
+\fB\fBmirror\fR\fR
.ad
.RS 10n
.rt
@@ -183,21 +183,25 @@ A mirror of two or more devices. Data is replicated in an identical fashion acro
.ne 2
.mk
.na
-\fBraidz\fR
+\fB\fBraidz\fR\fR
+.ad
+.br
+.na
+\fB\fBraidz1\fR\fR
.ad
.br
.na
-\fBraidz1\fR
+\fB\fBraidz2\fR\fR
.ad
.br
.na
-\fBraidz2\fR
+\fB\fBraidz3\fR\fR
.ad
.RS 10n
.rt
A variation on \fBRAID-5\fR that allows for better distribution of parity and eliminates the "\fBRAID-5\fR write hole" (in which data and parity become inconsistent after a power loss). Data and parity is striped across all disks within a \fBraidz\fR group.
.sp
-A \fBraidz\fR group can have either single- or double-parity, meaning that the \fBraidz\fR group can sustain one or two failures respectively without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group and the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
+A \fBraidz\fR group can have single-, double- , or triple parity, meaning that the \fBraidz\fR group can sustain one, two, or three failures, respectively, without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group; the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group; and the \fBraidz3\fR \fBvdev\fR type specifies a triple-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
.sp
A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand \fIP\fR device(s) failing before data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9 to help increase performance.
.RE
@@ -206,7 +210,7 @@ A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks
.ne 2
.mk
.na
-\fBspare\fR
+\fB\fBspare\fR\fR
.ad
.RS 10n
.rt
@@ -217,22 +221,22 @@ A special pseudo-\fBvdev\fR which keeps track of available hot spares for a pool
.ne 2
.mk
.na
-\fBlog\fR
+\fB\fBlog\fR\fR
.ad
.RS 10n
.rt
-A separate intent log device. If more than one log device is specified, then writes are load-balanced between devices. Log devices can be mirrored. However, \fBraidz\fR and \fBraidz2\fR are not supported for the intent log. For more information, see the "Intent Log" section.
+A separate-intent log device. If more than one log device is specified, then writes are load-balanced between devices. Log devices can be mirrored. However, \fBraidz\fR \fBvdev\fR types are not supported for the intent log. For more information, see the "Intent Log" section.
.RE
.sp
.ne 2
.mk
.na
-\fBcache\fR
+\fB\fBcache\fR\fR
.ad
.RS 10n
.rt
-A device used to cache storage pool data. A cache device cannot be mirrored or part of a \fBraidz\fR or \fBraidz2\fR configuration. For more information, see the "Cache Devices" section.
+A device used to cache storage pool data. A cache device cannot be cannot be configured as a mirror or \fBraidz\fR group. For more information, see the "Cache Devices" section.
.RE
.sp
@@ -247,7 +251,7 @@ Virtual devices are specified one at a time on the command line, separated by wh
.sp
.in +2
.nf
-\fB# zpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
+# \fBzpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
.fi
.in -2
.sp
@@ -403,7 +407,7 @@ The \fBZFS\fR Intent Log (\fBZIL\fR) satisfies \fBPOSIX\fR requirements for sync
Multiple log devices can also be specified, and they can be mirrored. See the EXAMPLES section for an example of mirroring multiple log devices.
.sp
.LP
-Log devices can be added, replaced, attached, detached, and imported and exported as part of the larger pool.
+Log devices can be added, replaced, attached, detached, and imported and exported as part of the larger pool. Mirrored log devices can be removed by specifying the top-level mirror for the log.
.SS "Cache Devices"
.sp
.LP
@@ -433,7 +437,7 @@ Each pool has several properties associated with it. Some properties are read-on
.ne 2
.mk
.na
-\fBavailable\fR
+\fB\fBavailable\fR\fR
.ad
.RS 20n
.rt
@@ -444,7 +448,7 @@ Amount of storage available within the pool. This property can also be referred
.ne 2
.mk
.na
-\fBcapacity\fR
+\fB\fBcapacity\fR\fR
.ad
.RS 20n
.rt
@@ -455,7 +459,7 @@ Percentage of pool space used. This property can also be referred to by its shor
.ne 2
.mk
.na
-\fBhealth\fR
+\fB\fBhealth\fR\fR
.ad
.RS 20n
.rt
@@ -466,7 +470,7 @@ The current health of the pool. Health can be "\fBONLINE\fR", "\fBDEGRADED\fR",
.ne 2
.mk
.na
-\fBguid\fR
+\fB\fBguid\fR\fR
.ad
.RS 20n
.rt
@@ -477,7 +481,7 @@ A unique identifier for the pool.
.ne 2
.mk
.na
-\fBsize\fR
+\fB\fBsize\fR\fR
.ad
.RS 20n
.rt
@@ -488,7 +492,7 @@ Total size of the storage pool.
.ne 2
.mk
.na
-\fBused\fR
+\fB\fBused\fR\fR
.ad
.RS 20n
.rt
@@ -514,12 +518,23 @@ Alternate root directory. If set, this directory is prepended to any mount point
.sp
.LP
-The following properties can be set at creation time and import time, and later changed with the "\fBzpool set\fR" command:
+The following properties can be set at creation time and import time, and later changed with the \fBzpool set\fR command:
.sp
.ne 2
.mk
.na
-\fB\fBautoreplace\fR=on | off\fR
+\fB\fBautoexpand\fR=\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls automatic pool expansion when the underlying LUN is grown. If set to \fBon\fR, the pool will be resized according to the size of the expanded device. If the device is part of a mirror or \fBraidz\fR then all devices within that mirror/\fBraidz\fR group must be expanded before the new space is made available to the pool. The default behavior is \fBoff\fR. This property can also be referred to by its shortened column name, \fBexpand\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBautoreplace\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
@@ -541,7 +556,7 @@ Identifies the default bootable dataset for the root pool. This property is expe
.ne 2
.mk
.na
-\fB\fBcachefile\fR=\fIpath\fR | "none"\fR
+\fB\fBcachefile\fR=\fIpath\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
@@ -574,7 +589,7 @@ Controls the system behavior in the event of catastrophic pool failure. This con
.ne 2
.mk
.na
-\fBwait\fR
+\fB\fBwait\fR\fR
.ad
.RS 12n
.rt
@@ -585,7 +600,7 @@ Blocks all \fBI/O\fR access until the device connectivity is recovered and the e
.ne 2
.mk
.na
-\fBcontinue\fR
+\fB\fBcontinue\fR\fR
.ad
.RS 12n
.rt
@@ -596,7 +611,7 @@ Returns \fBEIO\fR to any new write \fBI/O\fR requests but allows reads to any of
.ne 2
.mk
.na
-\fBpanic\fR
+\fB\fBpanic\fR\fR
.ad
.RS 12n
.rt
@@ -613,7 +628,7 @@ Prints out a message to the console and generates a system crash dump.
.ad
.sp .6
.RS 4n
-Controls whether information about snapshots associated with this pool is output when "\fBzfs list\fR" is run without the \fB-t\fR option. The default value is "off".
+Controls whether information about snapshots associated with this pool is output when "\fBzfs list\fR" is run without the \fB-t\fR option. The default value is "off".
.RE
.sp
@@ -649,25 +664,19 @@ Displays a help message.
.ne 2
.mk
.na
-\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
+\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
-Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
-.sp
-The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses, such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
-.sp
-The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
-.sp
-Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
+Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create" subcommand.
.sp
.ne 2
.mk
.na
\fB\fB-f\fR\fR
.ad
-.sp .6
-.RS 4n
+.RS 6n
+.rt
Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
.RE
@@ -677,57 +686,32 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
.na
\fB\fB-n\fR\fR
.ad
-.sp .6
-.RS 4n
-Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Sets the given pool properties. See the "Properties" section for a list of valid properties that can be set.
+.RS 6n
+.rt
+Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-O\fR \fIfile-system-property=value\fR\fR
-.ad
-.br
-.na
-\fB[\fB-O\fR \fIfile-system-property=value\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Sets the given file system properties in the root file system of the pool. See the "Properties" section of \fBzfs\fR(1M) for a list of valid properties that can be set.
+Do not add a disk that is currently configured as a quorum device to a zpool. After a disk is in the pool, that disk can then be configured as a quorum device.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-R\fR \fIroot\fR\fR
+\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
.ad
.sp .6
.RS 4n
-Equivalent to "-o cachefile=none,altroot=\fIroot\fR"
-.RE
-
+Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
.sp
.ne 2
.mk
.na
-\fB\fB-m\fR \fImountpoint\fR\fR
+\fB\fB-f\fR\fR
.ad
-.sp .6
-.RS 4n
-Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR" or "\fBaltroot\fR/\fIpool\fR" if \fBaltroot\fR is specified. The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount points, see \fBzfs\fR(1M).
+.RS 6n
+.rt
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
.RE
.RE
@@ -736,41 +720,36 @@ Sets the mount point for the root dataset. The default mount point is "/\fIpool\
.ne 2
.mk
.na
-\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
+\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
.ad
.sp .6
.RS 4n
-Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces any active datasets contained within the pool to be unmounted.
-.RE
-
+Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...\fR
+\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
-Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create" subcommand.
+Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
+.sp
+The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses, such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
+.sp
+The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
+.sp
+Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
.sp
.ne 2
.mk
.na
\fB\fB-f\fR\fR
.ad
-.RS 6n
-.rt
+.sp .6
+.RS 4n
Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
.RE
@@ -780,148 +759,79 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
.na
\fB\fB-n\fR\fR
.ad
-.RS 6n
-.rt
-Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-Do not add a disk that is currently configured as a quorum device to a zpool. After a disk is in the pool, that disk can then be configured as a quorum device.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR
-.ad
.sp .6
.RS 4n
-Removes the specified device from the pool. This command currently only supports removing hot spares and cache devices. Devices that are part of a mirrored configuration can be removed using the "\fBzpool detach\fR" command. Non-redundant and \fBraidz\fR devices cannot be removed from a pool.
+Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
+\fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
.ad
.sp .6
.RS 4n
-Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-H\fR\fR
-.ad
-.RS 12n
-.rt
-Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+Sets the given pool properties. See the "Properties" section for a list of valid properties that can be set.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-o\fR \fIprops\fR\fR
+\fB\fB-O\fR \fIfile-system-property=value\fR\fR
.ad
-.RS 12n
-.rt
-Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot"
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
+.br
.na
-\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
+\fB[\fB-O\fR \fIfile-system-property=value\fR] ...\fR
.ad
.sp .6
.RS 4n
-Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
-.RE
-
+Sets the given file system properties in the root file system of the pool. See the "Properties" section of \fBzfs\fR(1M) for a list of valid properties that can be set.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
+\fB\fB-R\fR \fIroot\fR\fR
.ad
.sp .6
.RS 4n
-Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the "Device Failure and Recovery" section.
-.sp
-If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-x\fR\fR
-.ad
-.RS 6n
-.rt
-Only display status for pools that are exhibiting errors or are otherwise unavailable.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
-.RE
-
+Equivalent to "-o cachefile=none,altroot=\fIroot\fR"
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
+\fB\fB-m\fR \fImountpoint\fR\fR
.ad
.sp .6
.RS 4n
-Brings the specified physical device online.
-.sp
-This command is not applicable to spares or cache devices.
+Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR" or "\fBaltroot\fR/\fIpool\fR" if \fBaltroot\fR is specified. The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount points, see \fBzfs\fR(1M).
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
+\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
.ad
.sp .6
.RS 4n
-Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
-.sp
-This command is not applicable to spares or cache devices.
+Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
.sp
.ne 2
.mk
.na
-\fB\fB-t\fR\fR
+\fB\fB-f\fR\fR
.ad
.RS 6n
.rt
-Temporary. Upon reboot, the specified physical device reverts to its previous state.
+Forces any active datasets contained within the pool to be unmounted.
.RE
.RE
@@ -930,22 +840,26 @@ Temporary. Upon reboot, the specified physical device reverts to its previous st
.ne 2
.mk
.na
-\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
+\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
.ad
.sp .6
.RS 4n
-Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
+Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
+\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
+Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+.sp
+Before exporting the pool, all datasets within the pool are unmounted. A pool can not be exported if it has a shared spare that is currently being used.
+.sp
+For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
.sp
.ne 2
.mk
@@ -954,7 +868,9 @@ Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device
.ad
.RS 6n
.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+.sp
+This command will forcefully export the pool even if it has a shared spare that is currently being used. This may lead to potential data corruption.
.RE
.RE
@@ -963,61 +879,54 @@ Forces use of \fInew_device\fR, even if its appears to be in use. Not all device
.ne 2
.mk
.na
-\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
+\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
+Retrieves the given list of properties (or all properties if "\fBall\fR" is used) for the specified storage pool(s). These properties are displayed with the following fields:
+.sp
+.in +2
+.nf
+ name Name of storage pool
+ property Property name
+ value Property value
+ source Property source, either 'default' or 'local'.
+.fi
+.in -2
+.sp
+
+See the "Properties" section for more information on the available pool properties.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
+\fB\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...\fR
.ad
.sp .6
.RS 4n
-Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
-.sp
-The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
-.sp
-\fInew_device\fR is required if the pool is not redundant. If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+Displays the command history of the specified pools or all pools if no pool is specified.
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fB-i\fR\fR
.ad
.RS 6n
.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
-.RE
-
+Displays internally logged \fBZFS\fR events in addition to user initiated events.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
-.sp
-Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to discover silent errors due to hardware faults or disk failure.
-.sp
-Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR\fR
+\fB\fB-l\fR\fR
.ad
.RS 6n
.rt
-Stop scrubbing.
+Displays log records in long format, which in addition to standard format includes, the user name, the hostname, and the zone in which the operation was performed.
.RE
.RE
@@ -1261,26 +1170,66 @@ Sets the "\fBcachefile\fR" property to "\fBnone\fR" and the "\fIaltroot\fR" prop
.ne 2
.mk
.na
-\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
+\fB\fBzpool iostat\fR [\fB-T\fR \fBu\fR | \fBd\fR] [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
.ad
.sp .6
.RS 4n
-Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
.sp
-Before exporting the pool, all datasets within the pool are unmounted. A pool can not be exported if it has a shared spare that is currently being used.
+.ne 2
+.mk
+.na
+\fB\fB-T\fR \fBu\fR | \fBd\fR\fR
+.ad
+.RS 12n
+.rt
+Display a time stamp.
.sp
-For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
+Specify \fBu\fR for a printed representation of the internal representation of time. See \fBtime\fR(2). Specify \fBd\fR for standard date format. See \fBdate\fR(1).
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fB-v\fR\fR
.ad
-.RS 6n
+.RS 12n
.rt
-Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
+.RE
+
+.RE
+
.sp
-This command will forcefully export the pool even if it has a shared spare that is currently being used. This may lead to potential data corruption.
+.ne 2
+.mk
+.na
+\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 12n
+.rt
+Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIprops\fR\fR
+.ad
+.RS 12n
+.rt
+Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot"
.RE
.RE
@@ -1289,53 +1238,109 @@ This command will forcefully export the pool even if it has a shared spare that
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR\fR
+\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
-Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with a more recent version are also displayed, although these pools will be inaccessible on the system.
+Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
+.sp
+This command is not applicable to spares or cache devices.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-t\fR\fR
+.ad
+.RS 6n
+.rt
+Temporary. Upon reboot, the specified physical device reverts to its previous state.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR \fB-v\fR\fR
+\fB\fBzpool online\fR [\fB-e\fR] \fIpool\fR \fIdevice\fR...\fR
.ad
.sp .6
.RS 4n
-Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supported versions are displayed, along with an explanation of the features provided with each version.
+Brings the specified physical device online.
+.sp
+This command is not applicable to spares or cache devices.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-e\fR\fR
+.ad
+.RS 6n
+.rt
+Expand the device to use all available space. If the device is part of a mirror or \fBraidz\fR then all devices must be expanded before the new space will become available to the pool.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...\fR
+\fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
-Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
+Removes the specified device from the pool. This command currently only supports removing hot spares, cache, and log devices. A mirrored log device can be removed by specifying the top-level mirror for the log. Non-log devices that are part of a mirrored configuration can be removed using the \fBzpool detach\fR command. Non-redundant and \fBraidz\fR devices cannot be removed from a pool.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-a\fR\fR
+\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
.ad
-.RS 14n
+.sp .6
+.RS 4n
+Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
+.sp
+The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
+.sp
+\fInew_device\fR is required if the pool is not redundant. If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
.rt
-Upgrades all pools.
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-V\fR \fIversion\fR\fR
+\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
.ad
-.RS 14n
+.sp .6
+.RS 4n
+Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
+.sp
+Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to discover silent errors due to hardware faults or disk failure.
+.sp
+Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR\fR
+.ad
+.RS 6n
.rt
-Upgrade to the specified version. If the \fB-V\fR flag is not specified, the pool is upgraded to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
+Stop scrubbing.
.RE
.RE
@@ -1344,31 +1349,44 @@ Upgrade to the specified version. If the \fB-V\fR flag is not specified, the poo
.ne 2
.mk
.na
-\fB\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...\fR
+\fB\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR\fR
.ad
.sp .6
.RS 4n
-Displays the command history of the specified pools or all pools if no pool is specified.
+Sets the given property on the specified pool. See the "Properties" section for more information on what properties can be set and acceptable values.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-i\fR\fR
+\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the "Device Failure and Recovery" section.
+.sp
+If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-x\fR\fR
.ad
.RS 6n
.rt
-Displays internally logged \fBZFS\fR events in addition to user initiated events.
+Only display status for pools that are exhibiting errors or are otherwise unavailable.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-l\fR\fR
+\fB\fB-v\fR\fR
.ad
.RS 6n
.rt
-Displays log records in long format, which in addition to standard format includes, the user name, the hostname, and the zone in which the operation was performed.
+Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
.RE
.RE
@@ -1377,34 +1395,55 @@ Displays log records in long format, which in addition to standard format includ
.ne 2
.mk
.na
-\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
+\fB\fBzpool upgrade\fR\fR
.ad
.sp .6
.RS 4n
-Retrieves the given list of properties (or all properties if "\fBall\fR" is used) for the specified storage pool(s). These properties are displayed with the following fields:
-.sp
-.in +2
-.nf
- name Name of storage pool
- property Property name
- value Property value
- source Property source, either 'default' or 'local'.
-.fi
-.in -2
-.sp
+Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with a more recent version are also displayed, although these pools will be inaccessible on the system.
+.RE
-See the "Properties" section for more information on the available pool properties.
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool upgrade\fR \fB-v\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supported versions are displayed, along with an explanation of the features provided with each version.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR\fR
+\fB\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Sets the given property on the specified pool. See the "Properties" section for more information on what properties can be set and acceptable values.
+Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-a\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrades all pools.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-V\fR \fIversion\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrade to the specified version. If the \fB-V\fR flag is not specified, the pool is upgraded to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
+.RE
+
.RE
.SH EXAMPLES
@@ -1417,7 +1456,7 @@ The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR t
.sp
.in +2
.nf
-\fB# zpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
+# \fBzpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
.fi
.in -2
.sp
@@ -1431,7 +1470,7 @@ The following command creates a pool with two mirrors, where each mirror contain
.sp
.in +2
.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
+# \fBzpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
.fi
.in -2
.sp
@@ -1445,7 +1484,7 @@ The following command creates an unmirrored pool using two disk slices.
.sp
.in +2
.nf
-\fB# zpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
+# \fBzpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
.fi
.in -2
.sp
@@ -1459,7 +1498,7 @@ The following command creates an unmirrored pool using files. While not recommen
.sp
.in +2
.nf
-\fB# zpool create tank /path/to/file/a /path/to/file/b\fR
+# \fBzpool create tank /path/to/file/a /path/to/file/b\fR
.fi
.in -2
.sp
@@ -1473,7 +1512,7 @@ The following command adds two mirrored disks to the pool "\fItank\fR", assuming
.sp
.in +2
.nf
-\fB# zpool add tank mirror c1t0d0 c1t1d0\fR
+# \fBzpool add tank mirror c1t0d0 c1t1d0\fR
.fi
.in -2
.sp
@@ -1491,7 +1530,7 @@ The results from this command are similar to the following:
.sp
.in +2
.nf
-\fB# zpool list\fR
+# \fBzpool list\fR
NAME SIZE USED AVAIL CAP HEALTH ALTROOT
pool 67.5G 2.92M 67.5G 0% ONLINE -
tank 67.5G 2.92M 67.5G 0% ONLINE -
@@ -1509,7 +1548,7 @@ The following command destroys the pool "\fItank\fR" and any datasets contained
.sp
.in +2
.nf
-\fB# zpool destroy -f tank\fR
+# \fBzpool destroy -f tank\fR
.fi
.in -2
.sp
@@ -1523,7 +1562,7 @@ The following command exports the devices in pool \fItank\fR so that they can be
.sp
.in +2
.nf
-\fB# zpool export tank\fR
+# \fBzpool export tank\fR
.fi
.in -2
.sp
@@ -1541,7 +1580,7 @@ The results from this command are similar to the following:
.sp
.in +2
.nf
-\fB# zpool import\fR
+# \fBzpool import\fR
pool: tank
id: 15451357997522795478
state: ONLINE
@@ -1553,7 +1592,7 @@ config:
c1t2d0 ONLINE
c1t3d0 ONLINE
-\fB# zpool import tank\fR
+# \fBzpool import tank\fR
.fi
.in -2
.sp
@@ -1567,7 +1606,7 @@ The following command upgrades all ZFS Storage pools to the current version of t
.sp
.in +2
.nf
-\fB# zpool upgrade -a\fR
+# \fBzpool upgrade -a\fR
This system is currently running ZFS version 2.
.fi
.in -2
@@ -1582,7 +1621,7 @@ The following command creates a new pool with an available hot spare:
.sp
.in +2
.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
+# \fBzpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
.fi
.in -2
.sp
@@ -1594,7 +1633,7 @@ If one of the disks were to fail, the pool would be reduced to the degraded stat
.sp
.in +2
.nf
-\fB# zpool replace tank c0t0d0 c0t3d0\fR
+# \fBzpool replace tank c0t0d0 c0t3d0\fR
.fi
.in -2
.sp
@@ -1606,7 +1645,7 @@ Once the data has been resilvered, the spare is automatically removed and is mad
.sp
.in +2
.nf
-\fB# zpool remove tank c0t2d0\fR
+# \fBzpool remove tank c0t2d0\fR
.fi
.in -2
.sp
@@ -1620,7 +1659,7 @@ The following command creates a ZFS storage pool consisting of two, two-way mirr
.sp
.in +2
.nf
-\fB# zpool create pool mirror c0d0 c1d0 mirror c2d0 c3d0 log mirror \e
+# \fBzpool create pool mirror c0d0 c1d0 mirror c2d0 c3d0 log mirror \e
c4d0 c5d0\fR
.fi
.in -2
@@ -1635,7 +1674,7 @@ The following command adds two disks for use as cache devices to a ZFS storage p
.sp
.in +2
.nf
-\fB# zpool add pool cache c2d0 c3d0\fR
+# \fBzpool add pool cache c2d0 c3d0\fR
.fi
.in -2
.sp
@@ -1643,10 +1682,57 @@ The following command adds two disks for use as cache devices to a ZFS storage p
.sp
.LP
Once added, the cache devices gradually fill with content from main memory. Depending on the size of your cache devices, it could take over an hour for them to fill. Capacity and reads can be monitored using the \fBiostat\fR option as follows:
+
+.sp
+.in +2
+.nf
+# \fBzpool iostat -v pool 5\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 14 \fRRemoving a Mirrored Log Device
+.sp
+.LP
+The following command removes the mirrored log device \fBmirror-2\fR.
+
+.sp
+.LP
+Given this configuration:
+
+.sp
+.in +2
+.nf
+ pool: tank
+ state: ONLINE
+ scrub: none requested
+config:
+
+ NAME STATE READ WRITE CKSUM
+ tank ONLINE 0 0 0
+ mirror-0 ONLINE 0 0 0
+ c6t0d0 ONLINE 0 0 0
+ c6t1d0 ONLINE 0 0 0
+ mirror-1 ONLINE 0 0 0
+ c6t2d0 ONLINE 0 0 0
+ c6t3d0 ONLINE 0 0 0
+ logs
+ mirror-2 ONLINE 0 0 0
+ c4t0d0 ONLINE 0 0 0
+ c4t1d0 ONLINE 0 0 0
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+The command to remove the mirrored log \fBmirror-2\fR is:
+
.sp
.in +2
.nf
-\fB# zpool iostat -v pool 5\fR
+# \fBzpool remove tank mirror-2\fR
.fi
.in -2
.sp
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
index 09cba89d7b9a..73e40ecc9b11 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <solaris.h>
@@ -44,7 +43,6 @@
#include <zone.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
-
#include <sys/stat.h>
#include <libzfs.h>
@@ -52,6 +50,8 @@
#include "zpool_util.h"
#include "zfs_comutil.h"
+#include "statcommon.h"
+
static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **);
@@ -69,6 +69,7 @@ static int zpool_do_clear(int, char **);
static int zpool_do_attach(int, char **);
static int zpool_do_detach(int, char **);
static int zpool_do_replace(int, char **);
+static int zpool_do_split(int, char **);
static int zpool_do_scrub(int, char **);
@@ -121,7 +122,8 @@ typedef enum {
HELP_STATUS,
HELP_UPGRADE,
HELP_GET,
- HELP_SET
+ HELP_SET,
+ HELP_SPLIT
} zpool_help_t;
@@ -158,6 +160,7 @@ static zpool_command_t command_table[] = {
{ "attach", zpool_do_attach, HELP_ATTACH },
{ "detach", zpool_do_detach, HELP_DETACH },
{ "replace", zpool_do_replace, HELP_REPLACE },
+ { "split", zpool_do_split, HELP_SPLIT },
{ NULL },
{ "scrub", zpool_do_scrub, HELP_SCRUB },
{ NULL },
@@ -175,6 +178,8 @@ static zpool_command_t command_table[] = {
zpool_command_t *current_command;
static char history_str[HIS_MAX_RECORD_LEN];
+static uint_t timestamp_fmt = NODATE;
+
static const char *
get_usage(zpool_help_t idx) {
switch (idx) {
@@ -184,7 +189,7 @@ get_usage(zpool_help_t idx) {
return (gettext("\tattach [-f] <pool> <device> "
"<new-device>\n"));
case HELP_CLEAR:
- return (gettext("\tclear <pool> [device]\n"));
+ return (gettext("\tclear [-nF] <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fn] [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
@@ -199,17 +204,20 @@ get_usage(zpool_help_t idx) {
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
+ "\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-R root] -a\n"
+ "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-R root] "
- "<pool | id> [newpool]\n"));
+ "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "[-R root] [-F [-n]]\n"
+ "\t <pool | id> [newpool]\n"));
case HELP_IOSTAT:
- return (gettext("\tiostat [-v] [pool] ... [interval "
+ return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
"[count]]\n"));
case HELP_LIST:
return (gettext("\tlist [-H] [-o property[,...]] "
- "[pool] ...\n"));
+ "[-T d|u] [pool] ... [interval [count]]\n"));
case HELP_OFFLINE:
return (gettext("\toffline [-t] <pool> <device> ...\n"));
case HELP_ONLINE:
@@ -222,7 +230,8 @@ get_usage(zpool_help_t idx) {
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
- return (gettext("\tstatus [-vx] [pool] ...\n"));
+ return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval "
+ "[count]]\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade\n"
"\tupgrade -v\n"
@@ -232,6 +241,10 @@ get_usage(zpool_help_t idx) {
"<pool> ...\n"));
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
+ case HELP_SPLIT:
+ return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n"
+ "\t [-o property=value] <pool> <newpool> "
+ "[<device> ...]\n"));
}
abort();
@@ -247,12 +260,12 @@ print_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
- (void) fprintf(fp, "\t%-13s ", zpool_prop_to_name(prop));
+ (void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop));
if (zpool_prop_readonly(prop))
(void) fprintf(fp, " NO ");
else
- (void) fprintf(fp, " YES ");
+ (void) fprintf(fp, " YES ");
if (zpool_prop_values(prop) == NULL)
(void) fprintf(fp, "-\n");
@@ -299,7 +312,7 @@ usage(boolean_t requested)
(void) fprintf(fp,
gettext("\nthe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-13s %s %s\n\n",
+ (void) fprintf(fp, "\n\t%-15s %s %s\n\n",
"PROPERTY", "EDIT", "VALUES");
/* Iterate over all properties */
@@ -341,7 +354,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
if ((is_log && !print_logs) || (!is_log && print_logs))
continue;
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
print_vdev_tree(zhp, vname, child[c], indent + 2,
B_FALSE);
free(vname);
@@ -509,11 +522,10 @@ zpool_do_add(int argc, char **argv)
}
/*
- * zpool remove <pool> <vdev> ...
+ * zpool remove <pool> <vdev> ...
*
- * Removes the given vdev from the pool. Currently, this only supports removing
- * spares and cache devices from the pool. Eventually, we'll want to support
- * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs.
+ * Removes the given vdev from the pool. Currently, this supports removing
+ * spares, cache, and log devices from the pool.
*/
int
zpool_do_remove(int argc, char **argv)
@@ -942,7 +954,7 @@ zpool_do_export(int argc, char **argv)
static int
max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
{
- char *name = zpool_vdev_name(g_zfs, zhp, nv);
+ char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE);
nvlist_t **child;
uint_t c, children;
int ret;
@@ -1034,20 +1046,21 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
{
nvlist_t **child;
uint_t c, children;
+ pool_scan_stat_t *ps = NULL;
vdev_stat_t *vs;
- char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+ char rbuf[6], wbuf[6], cbuf[6];
char *vname;
uint64_t notpresent;
spare_cbdata_t cb;
char *state;
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
children = 0;
+ verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &c) == 0);
+
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
if (isspare) {
/*
@@ -1125,31 +1138,43 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("bad intent log"));
break;
+ case VDEV_AUX_EXTERNAL:
+ (void) printf(gettext("external device fault"));
+ break;
+
+ case VDEV_AUX_SPLIT_POOL:
+ (void) printf(gettext("split into new pool"));
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
}
- } else if (vs->vs_scrub_repaired != 0 && children == 0) {
- /*
- * Report bytes resilvered/repaired on leaf devices.
- */
- zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
- (void) printf(gettext(" %s %s"), repaired,
- (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilvered" : "repaired");
+ }
+
+ (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
+ (uint64_t **)&ps, &c);
+
+ if (ps && ps->pss_state == DSS_SCANNING &&
+ vs->vs_scan_processed != 0 && children == 0) {
+ (void) printf(gettext(" (%s)"),
+ (ps->pss_func == POOL_SCAN_RESILVER) ?
+ "resilvering" : "repairing");
}
(void) printf("\n");
for (c = 0; c < children; c++) {
- uint64_t is_log = B_FALSE;
+ uint64_t islog = B_FALSE, ishole = B_FALSE;
- /* Don't print logs here */
+ /* Don't print logs or holes here */
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
- &is_log);
- if (is_log)
+ &islog);
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+ &ishole);
+ if (islog || ishole)
continue;
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
print_status_config(zhp, vname, child[c],
namewidth, depth + 2, isspare);
free(vname);
@@ -1170,10 +1195,11 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
char *type, *vname;
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
- if (strcmp(type, VDEV_TYPE_MISSING) == 0)
+ if (strcmp(type, VDEV_TYPE_MISSING) == 0 ||
+ strcmp(type, VDEV_TYPE_HOLE) == 0)
return;
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
@@ -1222,7 +1248,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
if (is_log)
continue;
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE);
print_import_config(vname, child[c], namewidth, depth + 2);
free(vname);
}
@@ -1231,7 +1257,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
&child, &children) == 0) {
(void) printf(gettext("\tcache\n"));
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
(void) printf("\t %s\n", vname);
free(vname);
}
@@ -1241,7 +1267,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
&child, &children) == 0) {
(void) printf(gettext("\tspares\n"));
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
(void) printf("\t %s\n", vname);
free(vname);
}
@@ -1276,7 +1302,7 @@ print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
&is_log);
if (!is_log)
continue;
- name = zpool_vdev_name(g_zfs, zhp, child[c]);
+ name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
if (verbose)
print_status_config(zhp, name, child[c], namewidth,
2, B_FALSE);
@@ -1285,6 +1311,7 @@ print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
free(name);
}
}
+
/*
* Display the status for the given pool.
*/
@@ -1311,7 +1338,7 @@ show_import(nvlist_t *config)
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) == 0);
health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
@@ -1378,6 +1405,11 @@ show_import(nvlist_t *config)
"read.\n"));
break;
+ case ZPOOL_STATUS_RESILVERING:
+ (void) printf(gettext("status: One or more devices were being "
+ "resilvered.\n"));
+ break;
+
default:
/*
* No other status can be seen when importing pools.
@@ -1471,13 +1503,12 @@ show_import(nvlist_t *config)
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
- int force, nvlist_t *props, boolean_t do_verbatim)
+ nvlist_t *props, int flags)
{
zpool_handle_t *zhp;
char *name;
uint64_t state;
uint64_t version;
- int error = 0;
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
&name) == 0);
@@ -1490,7 +1521,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
(void) fprintf(stderr, gettext("cannot import '%s': pool "
"is formatted using a newer ZFS version\n"), name);
return (1);
- } else if (state != POOL_STATE_EXPORTED && !force) {
+ } else if (state != POOL_STATE_EXPORTED &&
+ !(flags & ZFS_IMPORT_ANY_HOST)) {
uint64_t hostid;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
@@ -1524,7 +1556,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
}
- if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0)
+ if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
return (1);
if (newname != NULL)
@@ -1534,13 +1566,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
return (1);
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ !(flags & ZFS_IMPORT_ONLY) &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
zpool_close(zhp);
return (1);
}
zpool_close(zhp);
- return (error);
+ return (0);
}
/*
@@ -1548,7 +1581,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
* [-d dir | -c cachefile] [-f] -a
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
- * [-d dir | -c cachefile] [-f] <pool | id> [newpool]
+ * [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
*
* -c Read pool information from a cachefile instead of searching
* devices.
@@ -1563,14 +1596,23 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* the given root. The pool will remain exported when the machine
* is rebooted.
*
- * -f Force import, even if it appears that the pool is active.
- *
- * -F Import even in the presence of faulted vdevs. This is an
+ * -V Import even in the presence of faulted vdevs. This is an
* intentionally undocumented option for testing purposes, and
* treats the pool configuration as complete, leaving any bad
* vdevs in the FAULTED state. In other words, it does verbatim
* import.
*
+ * -f Force import, even if it appears that the pool is active.
+ *
+ * -F Attempt rewind if necessary.
+ *
+ * -n See if rewind would work, but don't actually rewind.
+ *
+ * -N Import the pool but don't mount datasets.
+ *
+ * -T Specify a starting txg to use for import. This option is
+ * intentionally undocumented option for testing purposes.
+ *
* -a Import all pools found.
*
* -o Set property=value and/or temporary mount options (without '=').
@@ -1584,26 +1626,32 @@ zpool_do_import(int argc, char **argv)
char **searchdirs = NULL;
int nsearch = 0;
int c;
- int err;
+ int err = 0;
nvlist_t *pools = NULL;
boolean_t do_all = B_FALSE;
boolean_t do_destroyed = B_FALSE;
char *mntopts = NULL;
- boolean_t do_force = B_FALSE;
nvpair_t *elem;
nvlist_t *config;
uint64_t searchguid = 0;
char *searchname = NULL;
char *propval;
nvlist_t *found_config;
+ nvlist_t *policy = NULL;
nvlist_t *props = NULL;
boolean_t first;
- boolean_t do_verbatim = B_FALSE;
- uint64_t pool_state;
+ int flags = ZFS_IMPORT_NORMAL;
+ uint32_t rewind_policy = ZPOOL_NO_REWIND;
+ boolean_t dryrun = B_FALSE;
+ boolean_t do_rewind = B_FALSE;
+ boolean_t xtreme_rewind = B_FALSE;
+ uint64_t pool_state, txg = -1ULL;
char *cachefile = NULL;
+ importargs_t idata = { 0 };
+ char *endptr;
/* check options */
- while ((c = getopt(argc, argv, ":ac:d:DfFo:p:R:")) != -1) {
+ while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -1628,10 +1676,19 @@ zpool_do_import(int argc, char **argv)
do_destroyed = B_TRUE;
break;
case 'f':
- do_force = B_TRUE;
+ flags |= ZFS_IMPORT_ANY_HOST;
break;
case 'F':
- do_verbatim = B_TRUE;
+ do_rewind = B_TRUE;
+ break;
+ case 'm':
+ flags |= ZFS_IMPORT_MISSING_LOG;
+ break;
+ case 'n':
+ dryrun = B_TRUE;
+ break;
+ case 'N':
+ flags |= ZFS_IMPORT_ONLY;
break;
case 'o':
if ((propval = strchr(optarg, '=')) != NULL) {
@@ -1656,6 +1713,22 @@ zpool_do_import(int argc, char **argv)
ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
goto error;
break;
+ case 'T':
+ errno = 0;
+ txg = strtoull(optarg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0') {
+ (void) fprintf(stderr,
+ gettext("invalid txg value\n"));
+ usage(B_FALSE);
+ }
+ rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND;
+ break;
+ case 'V':
+ flags |= ZFS_IMPORT_VERBATIM;
+ break;
+ case 'X':
+ xtreme_rewind = B_TRUE;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -1676,6 +1749,24 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE);
}
+ if ((dryrun || xtreme_rewind) && !do_rewind) {
+ (void) fprintf(stderr,
+ gettext("-n or -X only meaningful with -F\n"));
+ usage(B_FALSE);
+ }
+ if (dryrun)
+ rewind_policy = ZPOOL_TRY_REWIND;
+ else if (do_rewind)
+ rewind_policy = ZPOOL_DO_REWIND;
+ if (xtreme_rewind)
+ rewind_policy |= ZPOOL_EXTREME_REWIND;
+
+ /* In the future, we can capture further policy and include it here */
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, txg) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
+ goto error;
+
if (searchdirs == NULL) {
searchdirs = safe_malloc(sizeof (char *));
searchdirs[0] = "/dev/dsk";
@@ -1703,6 +1794,7 @@ zpool_do_import(int argc, char **argv)
(void) fprintf(stderr, gettext("cannot "
"discover pools: permission denied\n"));
free(searchdirs);
+ nvlist_free(policy);
return (1);
}
}
@@ -1728,28 +1820,49 @@ zpool_do_import(int argc, char **argv)
if (errno != 0 || *endptr != '\0')
searchname = argv[0];
found_config = NULL;
- }
- if (cachefile) {
- pools = zpool_find_import_cached(g_zfs, cachefile, searchname,
- searchguid);
- } else if (searchname != NULL) {
- pools = zpool_find_import_byname(g_zfs, nsearch, searchdirs,
- searchname);
- } else {
/*
- * It's OK to search by guid even if searchguid is 0.
+ * User specified a name or guid. Ensure it's unique.
*/
- pools = zpool_find_import_byguid(g_zfs, nsearch, searchdirs,
- searchguid);
- }
-
- if (pools == NULL) {
+ idata.unique = B_TRUE;
+ }
+
+
+ idata.path = searchdirs;
+ idata.paths = nsearch;
+ idata.poolname = searchname;
+ idata.guid = searchguid;
+ idata.cachefile = cachefile;
+
+ pools = zpool_search_import(g_zfs, &idata);
+
+ if (pools != NULL && idata.exists &&
+ (argc == 1 || strcmp(argv[0], argv[1]) == 0)) {
+ (void) fprintf(stderr, gettext("cannot import '%s': "
+ "a pool with that name already exists\n"),
+ argv[0]);
+ (void) fprintf(stderr, gettext("use the form '%s "
+ "<pool | id> <newpool>' to give it a new name\n"),
+ "zpool import");
+ err = 1;
+ } else if (pools == NULL && idata.exists) {
+ (void) fprintf(stderr, gettext("cannot import '%s': "
+ "a pool with that name is already created/imported,\n"),
+ argv[0]);
+ (void) fprintf(stderr, gettext("and no additional pools "
+ "with that name were found\n"));
+ err = 1;
+ } else if (pools == NULL) {
if (argc != 0) {
(void) fprintf(stderr, gettext("cannot import '%s': "
"no such pool available\n"), argv[0]);
}
+ err = 1;
+ }
+
+ if (err == 1) {
free(searchdirs);
+ nvlist_free(policy);
return (1);
}
@@ -1773,17 +1886,21 @@ zpool_do_import(int argc, char **argv)
if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
continue;
+ verify(nvlist_add_nvlist(config, ZPOOL_REWIND_POLICY,
+ policy) == 0);
+
if (argc == 0) {
if (first)
first = B_FALSE;
else if (!do_all)
(void) printf("\n");
- if (do_all)
+ if (do_all) {
err |= do_import(config, NULL, mntopts,
- do_force, props, do_verbatim);
- else
+ props, flags);
+ } else {
show_import(config);
+ }
} else if (searchname != NULL) {
char *name;
@@ -1829,7 +1946,7 @@ zpool_do_import(int argc, char **argv)
err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, do_force, props, do_verbatim);
+ argv[1], mntopts, props, flags);
}
}
@@ -1844,6 +1961,7 @@ zpool_do_import(int argc, char **argv)
error:
nvlist_free(props);
nvlist_free(pools);
+ nvlist_free(policy);
free(searchdirs);
return (err ? 1 : 0);
@@ -1871,7 +1989,7 @@ print_iostat_header(iostat_cbdata_t *cb)
{
(void) printf("%*s capacity operations bandwidth\n",
cb->cb_namewidth, "");
- (void) printf("%-*s used avail read write read write\n",
+ (void) printf("%-*s alloc free read write read write\n",
cb->cb_namewidth, "pool");
print_iostat_separator(cb);
}
@@ -1906,13 +2024,13 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
char *vname;
if (oldnv != NULL) {
- verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&oldvs, &c) == 0);
+ verify(nvlist_lookup_uint64_array(oldnv,
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0);
} else {
oldvs = &zerovs;
}
- verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&newvs, &c) == 0);
if (strlen(name) + depth > cb->cb_namewidth)
@@ -1962,7 +2080,13 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
return;
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+ uint64_t ishole = B_FALSE;
+
+ if (nvlist_lookup_uint64(newchild[c],
+ ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole)
+ continue;
+
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE);
print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
newchild[c], cb, depth + 2);
free(vname);
@@ -1983,7 +2107,8 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
(void) printf("%-*s - - - - - "
"-\n", cb->cb_namewidth, "cache");
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
+ B_FALSE);
print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
newchild[c], cb, depth + 2);
free(vname);
@@ -2072,42 +2197,14 @@ get_namewidth(zpool_handle_t *zhp, void *data)
}
/*
- * zpool iostat [-v] [pool] ... [interval [count]]
- *
- * -v Display statistics for individual vdevs
- *
- * This command can be tricky because we want to be able to deal with pool
- * creation/destruction as well as vdev configuration changes. The bulk of this
- * processing is handled by the pool_list_* routines in zpool_iter.c. We rely
- * on pool_list_update() to detect the addition of new pools. Configuration
- * changes are all handled within libzfs.
+ * Parse the input string, get the 'interval' and 'count' value if there is one.
*/
-int
-zpool_do_iostat(int argc, char **argv)
+static void
+get_interval_count(int *argcp, char **argv, unsigned long *iv,
+ unsigned long *cnt)
{
- int c;
- int ret;
- int npools;
unsigned long interval = 0, count = 0;
- zpool_list_t *list;
- boolean_t verbose = B_FALSE;
- iostat_cbdata_t cb;
-
- /* check options */
- while ((c = getopt(argc, argv, "v")) != -1) {
- switch (c) {
- case 'v':
- verbose = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
+ int argc = *argcp, errno;
/*
* Determine if the last argument is an integer or a pool name
@@ -2124,7 +2221,6 @@ zpool_do_iostat(int argc, char **argv)
"cannot be zero\n"));
usage(B_FALSE);
}
-
/*
* Ignore the last parameter
*/
@@ -2141,7 +2237,7 @@ zpool_do_iostat(int argc, char **argv)
/*
* If the last argument is also an integer, then we have both a count
- * and an integer.
+ * and an interval.
*/
if (argc > 0 && isdigit(argv[argc - 1][0])) {
char *end;
@@ -2166,6 +2262,66 @@ zpool_do_iostat(int argc, char **argv)
}
}
+ *iv = interval;
+ *cnt = count;
+ *argcp = argc;
+}
+
+static void
+get_timestamp_arg(char c)
+{
+ if (c == 'u')
+ timestamp_fmt = UDATE;
+ else if (c == 'd')
+ timestamp_fmt = DDATE;
+ else
+ usage(B_FALSE);
+}
+
+/*
+ * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]]
+ *
+ * -v Display statistics for individual vdevs
+ * -T Display a timestamp in date(1) or Unix format
+ *
+ * This command can be tricky because we want to be able to deal with pool
+ * creation/destruction as well as vdev configuration changes. The bulk of this
+ * processing is handled by the pool_list_* routines in zpool_iter.c. We rely
+ * on pool_list_update() to detect the addition of new pools. Configuration
+ * changes are all handled within libzfs.
+ */
+int
+zpool_do_iostat(int argc, char **argv)
+{
+ int c;
+ int ret;
+ int npools;
+ unsigned long interval = 0, count = 0;
+ zpool_list_t *list;
+ boolean_t verbose = B_FALSE;
+ iostat_cbdata_t cb;
+
+ /* check options */
+ while ((c = getopt(argc, argv, "T:v")) != -1) {
+ switch (c) {
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ get_interval_count(&argc, argv, &interval, &count);
+
/*
* Construct the list of all interesting pools.
*/
@@ -2212,6 +2368,9 @@ zpool_do_iostat(int argc, char **argv)
cb.cb_namewidth = 0;
(void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
+
/*
* If it's the first time, or verbose mode, print the header.
*/
@@ -2363,12 +2522,13 @@ list_callback(zpool_handle_t *zhp, void *data)
}
/*
- * zpool list [-H] [-o prop[,prop]*] [pool] ...
+ * zpool list [-H] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
*
* -H Scripted mode. Don't display headers, and separate properties
* by a single tab.
* -o List of properties to display. Defaults to
- * "name,size,used,available,capacity,health,altroot"
+ * "name,size,allocated,free,capacity,health,altroot"
+ * -T Display a timestamp in date(1) or Unix format
*
* List all pools in the system, whether or not they're healthy. Output space
* statistics for each one, as well as health status summary.
@@ -2380,11 +2540,12 @@ zpool_do_list(int argc, char **argv)
int ret;
list_cbdata_t cb = { 0 };
static char default_props[] =
- "name,size,used,available,capacity,health,altroot";
+ "name,size,allocated,free,capacity,dedupratio,health,altroot";
char *props = default_props;
+ unsigned long interval = 0, count = 0;
/* check options */
- while ((c = getopt(argc, argv, ":Ho:")) != -1) {
+ while ((c = getopt(argc, argv, ":Ho:T:")) != -1) {
switch (c) {
case 'H':
cb.cb_scripted = B_TRUE;
@@ -2392,6 +2553,9 @@ zpool_do_list(int argc, char **argv)
case 'o':
props = optarg;
break;
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -2407,21 +2571,37 @@ zpool_do_list(int argc, char **argv)
argc -= optind;
argv += optind;
+ get_interval_count(&argc, argv, &interval, &count);
+
if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
usage(B_FALSE);
cb.cb_first = B_TRUE;
- ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
- list_callback, &cb);
+ for (;;) {
- zprop_free_list(cb.cb_proplist);
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
- if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
- (void) printf(gettext("no pools available\n"));
- return (0);
+ ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
+ list_callback, &cb);
+
+ if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
+ (void) printf(gettext("no pools available\n"));
+ zprop_free_list(cb.cb_proplist);
+ return (0);
+ }
+
+ if (interval == 0)
+ break;
+
+ if (count != 0 && --count == 0)
+ break;
+
+ (void) sleep(interval);
}
+ zprop_free_list(cb.cb_proplist);
return (ret);
}
@@ -2436,10 +2616,10 @@ zpool_get_vdev_by_name(nvlist_t *nv, char *name)
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
- if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- name += sizeof(_PATH_DEV)-1;
- if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- path += sizeof(_PATH_DEV)-1;
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ path += sizeof(_PATH_DEV) - 1;
if (strcmp(name, path) == 0)
return (nv);
return (NULL);
@@ -2628,6 +2808,146 @@ zpool_do_detach(int argc, char **argv)
}
/*
+ * zpool split [-n] [-o prop=val] ...
+ * [-o mntopt] ...
+ * [-R altroot] <pool> <newpool> [<device> ...]
+ *
+ * -n Do not split the pool, but display the resulting layout if
+ * it were to be split.
+ * -o Set property=value, or set mount options.
+ * -R Mount the split-off pool under an alternate root.
+ *
+ * Splits the named pool and gives it the new pool name. Devices to be split
+ * off may be listed, provided that no more than one device is specified
+ * per top-level vdev mirror. The newly split pool is left in an exported
+ * state unless -R is specified.
+ *
+ * Restrictions: the top-level of the pool pool must only be made up of
+ * mirrors; all devices in the pool must be healthy; no device may be
+ * undergoing a resilvering operation.
+ */
+int
+zpool_do_split(int argc, char **argv)
+{
+ char *srcpool, *newpool, *propval;
+ char *mntopts = NULL;
+ splitflags_t flags;
+ int c, ret = 0;
+ zpool_handle_t *zhp;
+ nvlist_t *config, *props = NULL;
+
+ flags.dryrun = B_FALSE;
+ flags.import = B_FALSE;
+
+ /* check options */
+ while ((c = getopt(argc, argv, ":R:no:")) != -1) {
+ switch (c) {
+ case 'R':
+ flags.import = B_TRUE;
+ if (add_prop_list(
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ break;
+ case 'n':
+ flags.dryrun = B_TRUE;
+ break;
+ case 'o':
+ if ((propval = strchr(optarg, '=')) != NULL) {
+ *propval = '\0';
+ propval++;
+ if (add_prop_list(optarg, propval,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ } else {
+ mntopts = optarg;
+ }
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ break;
+ }
+ }
+
+ if (!flags.import && mntopts != NULL) {
+ (void) fprintf(stderr, gettext("setting mntopts is only "
+ "valid when importing the pool\n"));
+ usage(B_FALSE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("Missing pool name\n"));
+ usage(B_FALSE);
+ }
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("Missing new pool name\n"));
+ usage(B_FALSE);
+ }
+
+ srcpool = argv[0];
+ newpool = argv[1];
+
+ argc -= 2;
+ argv += 2;
+
+ if ((zhp = zpool_open(g_zfs, srcpool)) == NULL)
+ return (1);
+
+ config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv);
+ if (config == NULL) {
+ ret = 1;
+ } else {
+ if (flags.dryrun) {
+ (void) printf(gettext("would create '%s' with the "
+ "following layout:\n\n"), newpool);
+ print_vdev_tree(NULL, newpool, config, 0, B_FALSE);
+ }
+ nvlist_free(config);
+ }
+
+ zpool_close(zhp);
+
+ if (ret != 0 || flags.dryrun || !flags.import)
+ return (ret);
+
+ /*
+ * The split was successful. Now we need to open the new
+ * pool and import it.
+ */
+ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL)
+ return (1);
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ zpool_enable_datasets(zhp, mntopts, 0) != 0) {
+ ret = 1;
+ (void) fprintf(stderr, gettext("Split was succssful, but "
+ "the datasets could not all be mounted\n"));
+ (void) fprintf(stderr, gettext("Try doing '%s' with a "
+ "different altroot\n"), "zpool import");
+ }
+ zpool_close(zhp);
+
+ return (ret);
+}
+
+
+
+/*
* zpool online <pool> <device> ...
*/
int
@@ -2638,10 +2958,14 @@ zpool_do_online(int argc, char **argv)
zpool_handle_t *zhp;
int ret = 0;
vdev_state_t newstate;
+ int flags = 0;
/* check options */
- while ((c = getopt(argc, argv, "t")) != -1) {
+ while ((c = getopt(argc, argv, "et")) != -1) {
switch (c) {
+ case 'e':
+ flags |= ZFS_ONLINE_EXPAND;
+ break;
case 't':
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2669,7 +2993,7 @@ zpool_do_online(int argc, char **argv)
return (1);
for (i = 1; i < argc; i++) {
- if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) {
+ if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
if (newstate != VDEV_STATE_HEALTHY) {
(void) printf(gettext("warning: device '%s' "
"onlined, but remains in faulted state\n"),
@@ -2763,31 +3087,80 @@ zpool_do_offline(int argc, char **argv)
int
zpool_do_clear(int argc, char **argv)
{
+ int c;
int ret = 0;
+ boolean_t dryrun = B_FALSE;
+ boolean_t do_rewind = B_FALSE;
+ boolean_t xtreme_rewind = B_FALSE;
+ uint32_t rewind_policy = ZPOOL_NO_REWIND;
+ nvlist_t *policy = NULL;
zpool_handle_t *zhp;
char *pool, *device;
- if (argc < 2) {
+ /* check options */
+ while ((c = getopt(argc, argv, "FnX")) != -1) {
+ switch (c) {
+ case 'F':
+ do_rewind = B_TRUE;
+ break;
+ case 'n':
+ dryrun = B_TRUE;
+ break;
+ case 'X':
+ xtreme_rewind = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
usage(B_FALSE);
}
- if (argc > 3) {
+ if (argc > 2) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
- pool = argv[1];
- device = argc == 3 ? argv[2] : NULL;
+ if ((dryrun || xtreme_rewind) && !do_rewind) {
+ (void) fprintf(stderr,
+ gettext("-n or -X only meaningful with -F\n"));
+ usage(B_FALSE);
+ }
+ if (dryrun)
+ rewind_policy = ZPOOL_TRY_REWIND;
+ else if (do_rewind)
+ rewind_policy = ZPOOL_DO_REWIND;
+ if (xtreme_rewind)
+ rewind_policy |= ZPOOL_EXTREME_REWIND;
+
+ /* In future, further rewind policy choices can be passed along here */
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
+ return (1);
+
+ pool = argv[0];
+ device = argc == 2 ? argv[1] : NULL;
- if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
+ if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
+ nvlist_free(policy);
return (1);
+ }
- if (zpool_clear(zhp, device) != 0)
+ if (zpool_clear(zhp, device, policy) != 0)
ret = 1;
zpool_close(zhp);
+ nvlist_free(policy);
+
return (ret);
}
@@ -2812,7 +3185,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1);
}
- err = zpool_scrub(zhp, cb->cb_type);
+ err = zpool_scan(zhp, cb->cb_type);
return (err != 0);
}
@@ -2828,13 +3201,13 @@ zpool_do_scrub(int argc, char **argv)
int c;
scrub_cbdata_t cb;
- cb.cb_type = POOL_SCRUB_EVERYTHING;
+ cb.cb_type = POOL_SCAN_SCRUB;
/* check options */
while ((c = getopt(argc, argv, "s")) != -1) {
switch (c) {
case 's':
- cb.cb_type = POOL_SCRUB_NONE;
+ cb.cb_type = POOL_SCAN_NONE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2862,68 +3235,119 @@ typedef struct status_cbdata {
boolean_t cb_verbose;
boolean_t cb_explain;
boolean_t cb_first;
+ boolean_t cb_dedup_stats;
} status_cbdata_t;
/*
* Print out detailed scrub status.
*/
void
-print_scrub_status(nvlist_t *nvroot)
+print_scan_status(pool_scan_stat_t *ps)
{
- vdev_stat_t *vs;
- uint_t vsc;
- time_t start, end, now;
+ time_t start, end;
+ uint64_t elapsed, mins_left, hours_left;
+ uint64_t pass_exam, examined, total;
+ uint_t rate;
double fraction_done;
- uint64_t examined, total, minutes_left, minutes_taken;
- char *scrub_type;
+ char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7];
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
+ (void) printf(gettext(" scan: "));
- /*
- * If there's never been a scrub, there's not much to say.
- */
- if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) {
+ /* If there's never been a scan, there's not much to say. */
+ if (ps == NULL || ps->pss_func == POOL_SCAN_NONE ||
+ ps->pss_func >= POOL_SCAN_FUNCS) {
(void) printf(gettext("none requested\n"));
return;
}
- scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilver" : "scrub";
+ start = ps->pss_start_time;
+ end = ps->pss_end_time;
+ zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf));
- start = vs->vs_scrub_start;
- end = vs->vs_scrub_end;
- now = time(NULL);
- examined = vs->vs_scrub_examined;
- total = vs->vs_alloc;
-
- if (end != 0) {
- minutes_taken = (uint64_t)((end - start) / 60);
-
- (void) printf(gettext("%s %s after %lluh%um with %llu errors "
- "on %s"),
- scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
+ assert(ps->pss_func == POOL_SCAN_SCRUB ||
+ ps->pss_func == POOL_SCAN_RESILVER);
+ /*
+ * Scan is finished or canceled.
+ */
+ if (ps->pss_state == DSS_FINISHED) {
+ uint64_t minutes_taken = (end - start) / 60;
+ char *fmt;
+
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ fmt = gettext("scrub repaired %s in %lluh%um with "
+ "%llu errors on %s");
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ fmt = gettext("resilvered %s in %lluh%um with "
+ "%llu errors on %s");
+ }
+ /* LINTED */
+ (void) printf(fmt, processed_buf,
(u_longlong_t)(minutes_taken / 60),
(uint_t)(minutes_taken % 60),
- (u_longlong_t)vs->vs_scrub_errors, ctime(&end));
+ (u_longlong_t)ps->pss_errors,
+ ctime((time_t *)&end));
+ return;
+ } else if (ps->pss_state == DSS_CANCELED) {
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext("scrub canceled on %s"),
+ ctime(&end));
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext("resilver canceled on %s"),
+ ctime(&end));
+ }
return;
}
- if (examined == 0)
- examined = 1;
- if (examined > total)
- total = examined;
+ assert(ps->pss_state == DSS_SCANNING);
+
+ /*
+ * Scan is in progress.
+ */
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext("scrub in progress since %s"),
+ ctime(&start));
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext("resilver in progress since %s"),
+ ctime(&start));
+ }
+ examined = ps->pss_examined ? ps->pss_examined : 1;
+ total = ps->pss_to_examine;
fraction_done = (double)examined / total;
- minutes_left = (uint64_t)((now - start) *
- (1 - fraction_done) / fraction_done / 60);
- minutes_taken = (uint64_t)((now - start) / 60);
- (void) printf(gettext("%s in progress for %lluh%um, %.2f%% done, "
- "%lluh%um to go\n"),
- scrub_type, (u_longlong_t)(minutes_taken / 60),
- (uint_t)(minutes_taken % 60), 100 * fraction_done,
- (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
+ /* elapsed time for this pass */
+ elapsed = time(NULL) - ps->pss_pass_start;
+ elapsed = elapsed ? elapsed : 1;
+ pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
+ rate = pass_exam / elapsed;
+ rate = rate ? rate : 1;
+ mins_left = ((total - examined) / rate) / 60;
+ hours_left = mins_left / 60;
+
+ zfs_nicenum(examined, examined_buf, sizeof (examined_buf));
+ zfs_nicenum(total, total_buf, sizeof (total_buf));
+ zfs_nicenum(rate, rate_buf, sizeof (rate_buf));
+
+ /*
+ * do not print estimated time if hours_left is more than 30 days
+ */
+ (void) printf(gettext(" %s scanned out of %s at %s/s"),
+ examined_buf, total_buf, rate_buf);
+ if (hours_left < (30 * 24)) {
+ (void) printf(gettext(", %lluh%um to go\n"),
+ (u_longlong_t)hours_left, (uint_t)(mins_left % 60));
+ } else {
+ (void) printf(gettext(
+ ", (scan is slow, no estimated time)\n"));
+ }
+
+ if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext(" %s resilvered, %.2f%% done\n"),
+ processed_buf, 100 * fraction_done);
+ } else if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext(" %s repaired, %.2f%% done\n"),
+ processed_buf, 100 * fraction_done);
+ }
}
static void
@@ -2974,7 +3398,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
(void) printf(gettext("\tspares\n"));
for (i = 0; i < nspares; i++) {
- name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+ name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE);
print_status_config(zhp, name, spares[i],
namewidth, 2, B_TRUE);
free(name);
@@ -2994,13 +3418,43 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
(void) printf(gettext("\tcache\n"));
for (i = 0; i < nl2cache; i++) {
- name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
+ name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE);
print_status_config(zhp, name, l2cache[i],
namewidth, 2, B_FALSE);
free(name);
}
}
+static void
+print_dedup_stats(nvlist_t *config)
+{
+ ddt_histogram_t *ddh;
+ ddt_stat_t *dds;
+ ddt_object_t *ddo;
+ uint_t c;
+
+ /*
+ * If the pool was faulted then we may not have been able to
+ * obtain the config. Otherwise, if have anything in the dedup
+ * table continue processing the stats.
+ */
+ if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS,
+ (uint64_t **)&ddo, &c) != 0 || ddo->ddo_count == 0)
+ return;
+
+ (void) printf("\n");
+ (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n",
+ (u_longlong_t)ddo->ddo_count,
+ (u_longlong_t)ddo->ddo_dspace,
+ (u_longlong_t)ddo->ddo_mspace);
+
+ verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
+ (uint64_t **)&dds, &c) == 0);
+ verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM,
+ (uint64_t **)&ddh, &c) == 0);
+ zpool_dump_ddt(dds, ddh);
+}
+
/*
* Display a summary of pool status. Displays a summary such as:
*
@@ -3053,7 +3507,7 @@ status_callback(zpool_handle_t *zhp, void *data)
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
@@ -3091,8 +3545,8 @@ status_callback(zpool_handle_t *zhp, void *data)
"be used because the label is missing \n\tor invalid. "
"There are insufficient replicas for the pool to "
"continue\n\tfunctioning.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
+ zpool_explain_recover(zpool_get_handle(zhp),
+ zpool_get_name(zhp), reason, config);
break;
case ZPOOL_STATUS_FAILING_DEV:
@@ -3116,6 +3570,16 @@ status_callback(zpool_handle_t *zhp, void *data)
"replace'.\n"));
break;
+ case ZPOOL_STATUS_REMOVED_DEV:
+ (void) printf(gettext("status: One or more devices has "
+ "been removed by the administrator.\n\tSufficient "
+ "replicas exist for the pool to continue functioning in "
+ "a\n\tdegraded state.\n"));
+ (void) printf(gettext("action: Online the device using "
+ "'zpool online' or replace the device with\n\t'zpool "
+ "replace'.\n"));
+ break;
+
case ZPOOL_STATUS_RESILVERING:
(void) printf(gettext("status: One or more devices is "
"currently being resilvered. The pool will\n\tcontinue "
@@ -3136,8 +3600,8 @@ status_callback(zpool_handle_t *zhp, void *data)
case ZPOOL_STATUS_CORRUPT_POOL:
(void) printf(gettext("status: The pool metadata is corrupted "
"and the pool cannot be opened.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
+ zpool_explain_recover(zpool_get_handle(zhp),
+ zpool_get_name(zhp), reason, config);
break;
case ZPOOL_STATUS_VERSION_OLDER:
@@ -3213,10 +3677,11 @@ status_callback(zpool_handle_t *zhp, void *data)
uint64_t nerr;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
+ pool_scan_stat_t *ps = NULL;
-
- (void) printf(gettext(" scrub: "));
- print_scrub_status(nvroot);
+ (void) nvlist_lookup_uint64_array(nvroot,
+ ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
+ print_scan_status(ps);
namewidth = max_width(zhp, nvroot, 0, 0);
if (namewidth < 10)
@@ -3272,6 +3737,9 @@ status_callback(zpool_handle_t *zhp, void *data)
else
print_error_log(zhp);
}
+
+ if (cbp->cb_dedup_stats)
+ print_dedup_stats(config);
} else {
(void) printf(gettext("config: The configuration cannot be "
"determined.\n"));
@@ -3281,10 +3749,12 @@ status_callback(zpool_handle_t *zhp, void *data)
}
/*
- * zpool status [-vx] [pool] ...
+ * zpool status [-vx] [-T d|u] [pool] ... [interval [count]]
*
* -v Display complete error logs
* -x Display only pools with potential problems
+ * -D Display dedup status (undocumented)
+ * -T Display a timestamp in date(1) or Unix format
*
* Describes the health status of all pools or some subset.
*/
@@ -3293,10 +3763,11 @@ zpool_do_status(int argc, char **argv)
{
int c;
int ret;
+ unsigned long interval = 0, count = 0;
status_cbdata_t cb = { 0 };
/* check options */
- while ((c = getopt(argc, argv, "vx")) != -1) {
+ while ((c = getopt(argc, argv, "vxDT:")) != -1) {
switch (c) {
case 'v':
cb.cb_verbose = B_TRUE;
@@ -3304,6 +3775,12 @@ zpool_do_status(int argc, char **argv)
case 'x':
cb.cb_explain = B_TRUE;
break;
+ case 'D':
+ cb.cb_dedup_stats = B_TRUE;
+ break;
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -3314,19 +3791,38 @@ zpool_do_status(int argc, char **argv)
argc -= optind;
argv += optind;
- cb.cb_first = B_TRUE;
+ get_interval_count(&argc, argv, &interval, &count);
if (argc == 0)
cb.cb_allpools = B_TRUE;
- ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb);
+ cb.cb_first = B_TRUE;
- if (argc == 0 && cb.cb_count == 0)
- (void) printf(gettext("no pools available\n"));
- else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
- (void) printf(gettext("all pools are healthy\n"));
+ for (;;) {
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
- return (ret);
+ ret = for_each_pool(argc, argv, B_TRUE, NULL,
+ status_callback, &cb);
+
+ if (argc == 0 && cb.cb_count == 0)
+ (void) printf(gettext("no pools available\n"));
+ else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
+ (void) printf(gettext("all pools are healthy\n"));
+
+ if (ret != 0)
+ return (ret);
+
+ if (interval == 0)
+ break;
+
+ if (count != 0 && --count == 0)
+ break;
+
+ (void) sleep(interval);
+ }
+
+ return (0);
}
typedef struct upgrade_cbdata {
@@ -3489,7 +3985,7 @@ zpool_do_upgrade(int argc, char **argv)
/* check options */
- while ((c = getopt(argc, argv, "avV:")) != -1) {
+ while ((c = getopt(argc, argv, ":avV:")) != -1) {
switch (c) {
case 'a':
cb.cb_all = B_TRUE;
@@ -3506,6 +4002,11 @@ zpool_do_upgrade(int argc, char **argv)
usage(B_FALSE);
}
break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -3568,11 +4069,25 @@ zpool_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 13 snapused property\n"));
(void) printf(gettext(" 14 passthrough-x aclinherit\n"));
(void) printf(gettext(" 15 user/group space accounting\n"));
- (void) printf(gettext("For more information on a particular "
- "version, including supported releases, see:\n\n"));
- (void) printf("http://www.opensolaris.org/os/community/zfs/"
- "version/N\n\n");
- (void) printf(gettext("Where 'N' is the version number.\n"));
+ (void) printf(gettext(" 16 stmf property support\n"));
+ (void) printf(gettext(" 17 Triple-parity RAID-Z\n"));
+ (void) printf(gettext(" 18 Snapshot user holds\n"));
+ (void) printf(gettext(" 19 Log device removal\n"));
+ (void) printf(gettext(" 20 Compression using zle "
+ "(zero-length encoding)\n"));
+ (void) printf(gettext(" 21 Deduplication\n"));
+ (void) printf(gettext(" 22 Received properties\n"));
+ (void) printf(gettext(" 23 Slim ZIL\n"));
+ (void) printf(gettext(" 24 System attributes\n"));
+ (void) printf(gettext(" 25 Improved scrub stats\n"));
+ (void) printf(gettext(" 26 Improved snapshot deletion "
+ "performance\n"));
+ (void) printf(gettext(" 27 Improved snapshot creation "
+ "performance\n"));
+ (void) printf(gettext(" 28 Multiple vdev replacements\n"));
+ (void) printf(gettext("\nFor more information on a particular "
+ "version, including supported releases,\n"));
+ (void) printf(gettext("see the ZFS Administration Guide.\n\n"));
} else if (argc == 0) {
int notfound;
@@ -3624,47 +4139,6 @@ typedef struct hist_cbdata {
int internal;
} hist_cbdata_t;
-char *hist_event_table[LOG_END] = {
- "invalid event",
- "pool create",
- "vdev add",
- "pool remove",
- "pool destroy",
- "pool export",
- "pool import",
- "vdev attach",
- "vdev replace",
- "vdev detach",
- "vdev online",
- "vdev offline",
- "vdev upgrade",
- "pool clear",
- "pool scrub",
- "pool property set",
- "create",
- "clone",
- "destroy",
- "destroy_begin_sync",
- "inherit",
- "property set",
- "quota set",
- "permission update",
- "permission remove",
- "permission who remove",
- "promote",
- "receive",
- "rename",
- "reservation set",
- "replay_inc_sync",
- "replay_full_sync",
- "rollback",
- "snapshot",
- "filesystem version upgrade",
- "refquota set",
- "refreservation set",
- "pool scrub done",
-};
-
/*
* Print out the command history for a specific pool.
*/
@@ -3722,7 +4196,7 @@ get_history_one(zpool_handle_t *zhp, void *data)
(void) snprintf(internalstr,
sizeof (internalstr),
"[internal %s txg:%lld] %s",
- hist_event_table[ievent], txg,
+ zfs_history_event_names[ievent], txg,
pathstr);
cmdstr = internalstr;
}
@@ -3834,7 +4308,8 @@ get_callback(zpool_handle_t *zhp, void *data)
continue;
zprop_print_one_property(zpool_get_name(zhp), cbp,
- zpool_prop_to_name(pl->pl_prop), value, srctype, NULL);
+ zpool_prop_to_name(pl->pl_prop), value, srctype, NULL,
+ NULL);
}
return (0);
}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
index f44da4ff60f5..c7a002efb17c 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <errno.h>
#include <libgen.h>
#include <libintl.h>
@@ -51,22 +49,6 @@ safe_malloc(size_t size)
}
/*
- * Same as above, but for strdup()
- */
-char *
-safe_strdup(const char *str)
-{
- char *ret;
-
- if ((ret = strdup(str)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
-
- return (ret);
-}
-
-/*
* Display an out of memory error message and abort the current program.
*/
void
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
index e82f3202af2a..134c730fcf8e 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef ZPOOL_UTIL_H
@@ -37,7 +36,6 @@ extern "C" {
* Basic utility functions
*/
void *safe_malloc(size_t);
-char *safe_strdup(const char *);
void zpool_no_memory(void);
uint_t num_logs(nvlist_t *nv);
@@ -46,7 +44,9 @@ uint_t num_logs(nvlist_t *nv);
*/
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
- boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
+ boolean_t replacing, boolean_t dryrun, int argc, char **argv);
+nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
+ nvlist_t *props, splitflags_t flags, int argc, char **argv);
/*
* Pool list functions
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
index 35a636c91128..5ffd39ac8fe6 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -66,6 +65,7 @@
#include <fcntl.h>
#include <libintl.h>
#include <libnvpair.h>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
@@ -77,6 +77,10 @@
#include "zpool_util.h"
+#define DISK_ROOT "/dev/dsk"
+#define RDISK_ROOT "/dev/rdsk"
+#define BACKUP_SLICE "s2"
+
/*
* For any given vdev specification, we can have multiple errors. The
* vdev_error() function keeps track of whether we have seen an error yet, and
@@ -107,6 +111,170 @@ vdev_error(const char *fmt, ...)
va_end(ap);
}
+#ifdef sun
+static void
+libdiskmgt_error(int error)
+{
+ /*
+ * ENXIO/ENODEV is a valid error message if the device doesn't live in
+ * /dev/dsk. Don't bother printing an error message in this case.
+ */
+ if (error == ENXIO || error == ENODEV)
+ return;
+
+ (void) fprintf(stderr, gettext("warning: device in use checking "
+ "failed: %s\n"), strerror(error));
+}
+
+/*
+ * Validate a device, passing the bulk of the work off to libdiskmgt.
+ */
+static int
+check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
+{
+ char *msg;
+ int error = 0;
+ dm_who_type_t who;
+
+ if (force)
+ who = DM_WHO_ZPOOL_FORCE;
+ else if (isspare)
+ who = DM_WHO_ZPOOL_SPARE;
+ else
+ who = DM_WHO_ZPOOL;
+
+ if (dm_inuse((char *)path, &msg, who, &error) || error) {
+ if (error != 0) {
+ libdiskmgt_error(error);
+ return (0);
+ } else {
+ vdev_error("%s", msg);
+ free(msg);
+ return (-1);
+ }
+ }
+
+ /*
+ * If we're given a whole disk, ignore overlapping slices since we're
+ * about to label it anyway.
+ */
+ error = 0;
+ if (!wholedisk && !force &&
+ (dm_isoverlapping((char *)path, &msg, &error) || error)) {
+ if (error == 0) {
+ /* dm_isoverlapping returned -1 */
+ vdev_error(gettext("%s overlaps with %s\n"), path, msg);
+ free(msg);
+ return (-1);
+ } else if (error != ENODEV) {
+ /* libdiskmgt's devcache only handles physical drives */
+ libdiskmgt_error(error);
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+
+/*
+ * Validate a whole disk. Iterate over all slices on the disk and make sure
+ * that none is in use by calling check_slice().
+ */
+static int
+check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
+{
+ dm_descriptor_t *drive, *media, *slice;
+ int err = 0;
+ int i;
+ int ret;
+
+ /*
+ * Get the drive associated with this disk. This should never fail,
+ * because we already have an alias handle open for the device.
+ */
+ if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
+ &err)) == NULL || *drive == NULL) {
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
+ &err)) == NULL) {
+ dm_free_descriptors(drive);
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ dm_free_descriptors(drive);
+
+ /*
+ * It is possible that the user has specified a removable media drive,
+ * and the media is not present.
+ */
+ if (*media == NULL) {
+ dm_free_descriptors(media);
+ vdev_error(gettext("'%s' has no media in drive\n"), name);
+ return (-1);
+ }
+
+ if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
+ &err)) == NULL) {
+ dm_free_descriptors(media);
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ dm_free_descriptors(media);
+
+ ret = 0;
+
+ /*
+ * Iterate over all slices and report any errors. We don't care about
+ * overlapping slices because we are using the whole disk.
+ */
+ for (i = 0; slice[i] != NULL; i++) {
+ char *name = dm_get_name(slice[i], &err);
+
+ if (check_slice(name, force, B_TRUE, isspare) != 0)
+ ret = -1;
+
+ dm_free_name(name);
+ }
+
+ dm_free_descriptors(slice);
+ return (ret);
+}
+
+/*
+ * Validate a device.
+ */
+static int
+check_device(const char *path, boolean_t force, boolean_t isspare)
+{
+ dm_descriptor_t desc;
+ int err;
+ char *dev;
+
+ /*
+ * For whole disks, libdiskmgt does not include the leading dev path.
+ */
+ dev = strrchr(path, '/');
+ assert(dev != NULL);
+ dev++;
+ if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
+ err = check_disk(path, desc, force, isspare);
+ dm_free_descriptor(desc);
+ return (err);
+ }
+
+ return (check_slice(path, force, B_FALSE, isspare));
+}
+#endif /* sun */
+
/*
* Check that a file is valid. All we can do in this case is check that it's
* not in use by another pool, and not in use by swap.
@@ -121,7 +289,7 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
pool_state_t state;
boolean_t inuse;
-#if 0
+#ifdef sun
if (dm_inuse_swap(file, &err)) {
if (err)
libdiskmgt_error(err);
@@ -185,7 +353,7 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
}
static int
-check_provider(const char *name, boolean_t force, boolean_t isspare)
+check_device(const char *name, boolean_t force, boolean_t isspare)
{
char path[MAXPATHLEN];
@@ -206,24 +374,44 @@ check_provider(const char *name, boolean_t force, boolean_t isspare)
* it isn't.
*/
static boolean_t
-is_whole_disk(const char *name)
+is_whole_disk(const char *arg)
{
+#ifdef sun
+ struct dk_gpt *label;
+ int fd;
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "%s%s%s",
+ RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
+ if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
+ return (B_FALSE);
+ if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+ efi_free(label);
+ (void) close(fd);
+ return (B_TRUE);
+#else
int fd;
- fd = g_open(name, 0);
+ fd = g_open(arg, 0);
if (fd >= 0) {
g_close(fd);
return (B_TRUE);
}
return (B_FALSE);
+#endif
}
/*
- * Create a leaf vdev. Determine if this is a GEOM provider.
- * Valid forms for a leaf vdev are:
+ * Create a leaf vdev. Determine if this is a file or a device. If it's a
+ * device, fill in the device id to make a complete nvlist. Valid forms for a
+ * leaf vdev are:
*
- * /dev/xxx Complete path to a GEOM provider
- * xxx Shorthand for /dev/xxx
+ * /dev/dsk/xxx Complete disk path
+ * /xxx Full path to file
+ * xxx Shorthand for /dev/dsk/xxx
*/
static nvlist_t *
make_leaf_vdev(const char *arg, uint64_t is_log)
@@ -290,10 +478,18 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
}
}
+#ifdef __FreeBSD__
+ if (S_ISCHR(statbuf.st_mode)) {
+ statbuf.st_mode &= ~S_IFCHR;
+ statbuf.st_mode |= S_IFBLK;
+ wholedisk = B_FALSE;
+ }
+#endif
+
/*
* Determine whether this is a device or a file.
*/
- if (wholedisk) {
+ if (wholedisk || S_ISBLK(statbuf.st_mode)) {
type = VDEV_TYPE_DISK;
} else if (S_ISREG(statbuf.st_mode)) {
type = VDEV_TYPE_FILE;
@@ -314,12 +510,12 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
- (uint64_t)B_FALSE) == 0);
+ (uint64_t)wholedisk) == 0);
/*
* For a whole disk, defer getting its devid until after labeling it.
*/
- if (1 || (S_ISBLK(statbuf.st_mode) && !wholedisk)) {
+ if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
/*
* Get the devid for the device.
*/
@@ -527,16 +723,14 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
*/
if ((fd = open(path, O_RDONLY)) >= 0) {
err = fstat64(fd, &statbuf);
- if (err == 0 &&
- S_ISCHR(statbuf.st_mode)) {
- err = ioctl(fd, DIOCGMEDIASIZE,
- &statbuf.st_size);
- }
(void) close(fd);
} else {
err = stat64(path, &statbuf);
}
- if (err != 0 || statbuf.st_size == 0)
+
+ if (err != 0 ||
+ statbuf.st_size == 0 ||
+ statbuf.st_size == MAXOFFSET_T)
continue;
size = statbuf.st_size;
@@ -714,6 +908,112 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
return (ret);
}
+#ifdef sun
+/*
+ * Go through and find any whole disks in the vdev specification, labelling them
+ * as appropriate. When constructing the vdev spec, we were unable to open this
+ * device in order to provide a devid. Now that we have labelled the disk and
+ * know that slice 0 is valid, we can construct the devid now.
+ *
+ * If the disk was already labeled with an EFI label, we will have gotten the
+ * devid already (because we were able to open the whole disk). Otherwise, we
+ * need to get the devid after we label the disk.
+ */
+static int
+make_disks(zpool_handle_t *zhp, nvlist_t *nv)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ char *type, *path, *diskname;
+ char buf[MAXPATHLEN];
+ uint64_t wholedisk;
+ int fd;
+ int ret;
+ ddi_devid_t devid;
+ char *minor = NULL, *devid_str = NULL;
+
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0) {
+
+ if (strcmp(type, VDEV_TYPE_DISK) != 0)
+ return (0);
+
+ /*
+ * We have a disk device. Get the path to the device
+ * and see if it's a whole disk by appending the backup
+ * slice and stat()ing the device.
+ */
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) != 0 || !wholedisk)
+ return (0);
+
+ diskname = strrchr(path, '/');
+ assert(diskname != NULL);
+ diskname++;
+ if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
+ return (-1);
+
+ /*
+ * Fill in the devid, now that we've labeled the disk.
+ */
+ (void) snprintf(buf, sizeof (buf), "%ss0", path);
+ if ((fd = open(buf, O_RDONLY)) < 0) {
+ (void) fprintf(stderr,
+ gettext("cannot open '%s': %s\n"),
+ buf, strerror(errno));
+ return (-1);
+ }
+
+ if (devid_get(fd, &devid) == 0) {
+ if (devid_get_minor_name(fd, &minor) == 0 &&
+ (devid_str = devid_str_encode(devid, minor)) !=
+ NULL) {
+ verify(nvlist_add_string(nv,
+ ZPOOL_CONFIG_DEVID, devid_str) == 0);
+ }
+ if (devid_str != NULL)
+ devid_str_free(devid_str);
+ if (minor != NULL)
+ devid_str_free(minor);
+ devid_free(devid);
+ }
+
+ /*
+ * Update the path to refer to the 's0' slice. The presence of
+ * the 'whole_disk' field indicates to the CLI that we should
+ * chop off the slice number when displaying the device in
+ * future output.
+ */
+ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
+
+ (void) close(fd);
+
+ return (0);
+ }
+
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ return (0);
+}
+#endif /* sun */
+
/*
* Determine if the given path is a hot spare within the given configuration.
*/
@@ -742,8 +1042,8 @@ is_spare(nvlist_t *config, const char *path)
return (B_FALSE);
}
free(name);
-
(void) close(fd);
+
verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
nvlist_free(label);
@@ -767,8 +1067,8 @@ is_spare(nvlist_t *config, const char *path)
* the majority of this task.
*/
static int
-check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
- int isspare)
+check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
+ boolean_t replacing, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
@@ -789,14 +1089,22 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
* hot spare within the same pool. If so, we allow it
* regardless of what libdiskmgt or zpool_in_use() says.
*/
- if (isreplacing) {
- (void) strlcpy(buf, path, sizeof (buf));
+ if (replacing) {
+#ifdef sun
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) == 0 && wholedisk)
+ (void) snprintf(buf, sizeof (buf), "%ss0",
+ path);
+ else
+#endif
+ (void) strlcpy(buf, path, sizeof (buf));
+
if (is_spare(config, buf))
return (0);
}
if (strcmp(type, VDEV_TYPE_DISK) == 0)
- ret = check_provider(path, force, isspare);
+ ret = check_device(path, force, isspare);
if (strcmp(type, VDEV_TYPE_FILE) == 0)
ret = check_file(path, force, isspare);
@@ -806,41 +1114,56 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_FALSE)) != 0)
+ replacing, B_FALSE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_TRUE)) != 0)
+ replacing, B_TRUE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_FALSE)) != 0)
+ replacing, B_FALSE)) != 0)
return (ret);
return (0);
}
static const char *
-is_grouping(const char *type, int *mindev)
+is_grouping(const char *type, int *mindev, int *maxdev)
{
- if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
- if (mindev != NULL)
- *mindev = 2;
- return (VDEV_TYPE_RAIDZ);
- }
+ if (strncmp(type, "raidz", 5) == 0) {
+ const char *p = type + 5;
+ char *end;
+ long nparity;
+
+ if (*p == '\0') {
+ nparity = 1;
+ } else if (*p == '0') {
+ return (NULL); /* no zero prefixes allowed */
+ } else {
+ errno = 0;
+ nparity = strtol(p, &end, 10);
+ if (errno != 0 || nparity < 1 || nparity >= 255 ||
+ *end != '\0')
+ return (NULL);
+ }
- if (strcmp(type, "raidz2") == 0) {
if (mindev != NULL)
- *mindev = 3;
+ *mindev = nparity + 1;
+ if (maxdev != NULL)
+ *maxdev = 255;
return (VDEV_TYPE_RAIDZ);
}
+ if (maxdev != NULL)
+ *maxdev = INT_MAX;
+
if (strcmp(type, "mirror") == 0) {
if (mindev != NULL)
*mindev = 2;
@@ -878,7 +1201,7 @@ nvlist_t *
construct_spec(int argc, char **argv)
{
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
- int t, toplevels, mindev, nspares, nlogs, nl2cache;
+ int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
const char *type;
uint64_t is_log;
boolean_t seen_logs;
@@ -900,7 +1223,7 @@ construct_spec(int argc, char **argv)
* If it's a mirror or raidz, the subsequent arguments are
* its leaves -- until we encounter the next mirror or raidz.
*/
- if ((type = is_grouping(argv[0], &mindev)) != NULL) {
+ if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
nvlist_t **child = NULL;
int c, children = 0;
@@ -957,7 +1280,7 @@ construct_spec(int argc, char **argv)
}
for (c = 1; c < argc; c++) {
- if (is_grouping(argv[c], NULL) != NULL)
+ if (is_grouping(argv[c], NULL, NULL) != NULL)
break;
children++;
child = realloc(child,
@@ -977,6 +1300,13 @@ construct_spec(int argc, char **argv)
return (NULL);
}
+ if (children > maxdev) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s supports no more than "
+ "%d devices\n"), argv[0], maxdev);
+ return (NULL);
+ }
+
argc -= c;
argv += c;
@@ -1071,6 +1401,54 @@ construct_spec(int argc, char **argv)
return (nvroot);
}
+nvlist_t *
+split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
+ splitflags_t flags, int argc, char **argv)
+{
+ nvlist_t *newroot = NULL, **child;
+ uint_t c, children;
+
+ if (argc > 0) {
+ if ((newroot = construct_spec(argc, argv)) == NULL) {
+ (void) fprintf(stderr, gettext("Unable to build a "
+ "pool from the specified devices\n"));
+ return (NULL);
+ }
+
+#ifdef sun
+ if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+ nvlist_free(newroot);
+ return (NULL);
+ }
+#endif
+
+ /* avoid any tricks in the spec */
+ verify(nvlist_lookup_nvlist_array(newroot,
+ ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+ for (c = 0; c < children; c++) {
+ char *path;
+ const char *type;
+ int min, max;
+
+ verify(nvlist_lookup_string(child[c],
+ ZPOOL_CONFIG_PATH, &path) == 0);
+ if ((type = is_grouping(path, &min, &max)) != NULL) {
+ (void) fprintf(stderr, gettext("Cannot use "
+ "'%s' as a device for splitting\n"), type);
+ nvlist_free(newroot);
+ return (NULL);
+ }
+ }
+ }
+
+ if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
+ if (newroot != NULL)
+ nvlist_free(newroot);
+ return (NULL);
+ }
+
+ return (newroot);
+}
/*
* Get and validate the contents of the given vdev specification. This ensures
@@ -1084,7 +1462,7 @@ construct_spec(int argc, char **argv)
*/
nvlist_t *
make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
- boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
+ boolean_t replacing, boolean_t dryrun, int argc, char **argv)
{
nvlist_t *newroot;
nvlist_t *poolconfig = NULL;
@@ -1107,8 +1485,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
* uses (such as a dedicated dump device) that even '-f' cannot
* override.
*/
- if (check_in_use(poolconfig, newroot, force, isreplacing,
- B_FALSE) != 0) {
+ if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
nvlist_free(newroot);
return (NULL);
}
@@ -1123,5 +1500,15 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
return (NULL);
}
+#ifdef sun
+ /*
+ * Run through the vdev specification and label any whole disks found.
+ */
+ if (!dryrun && make_disks(zhp, newroot) != 0) {
+ nvlist_free(newroot);
+ return (NULL);
+ }
+#endif
+
return (newroot);
}
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1 b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
new file mode 100644
index 000000000000..9e11948becf4
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
@@ -0,0 +1,67 @@
+'\" te
+.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
+.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH zstreamdump 1M "21 Sep 2009" "SunOS 5.11" "System Administration Commands"
+.SH NAME
+zstreamdump \- filter data in zfs send stream
+.SH SYNOPSIS
+.LP
+.nf
+\fBzstreamdump\fR [\fB-C\fR] [\fB-v\fR]
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBzstreamdump\fR utility reads from the output of the \fBzfs send\fR command, then displays headers and some statistics from that output. See \fBzfs\fR(1M).
+.SH OPTIONS
+.sp
+.LP
+The following options are supported:
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-C\fR\fR
+.ad
+.sp .6
+.RS 4n
+Suppress the validation of checksums.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.sp .6
+.RS 4n
+Verbose. Dump all headers, not only begin and end headers.
+.RE
+
+.SH ATTRIBUTES
+.sp
+.LP
+See \fBattributes\fR(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+tab() box;
+cw(2.75i) |cw(2.75i)
+lw(2.75i) |lw(2.75i)
+.
+ATTRIBUTE TYPEATTRIBUTE VALUE
+_
+AvailabilitySUNWzfsu
+_
+Interface StabilityUncommitted
+.TE
+
+.SH SEE ALSO
+.sp
+.LP
+\fBzfs\fR(1M), \fBattributes\fR(5)
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
new file mode 100644
index 000000000000..df23cc1e5a38
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
@@ -0,0 +1,429 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <libnvpair.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/dmu.h>
+#include <sys/zfs_ioctl.h>
+#include <zfs_fletcher.h>
+
+uint64_t drr_record_count[DRR_NUMTYPES];
+uint64_t total_write_size = 0;
+uint64_t total_stream_len = 0;
+FILE *send_stream = 0;
+boolean_t do_byteswap = B_FALSE;
+boolean_t do_cksum = B_TRUE;
+#define INITIAL_BUFLEN (1<<20)
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr, "usage: zstreamdump [-v] [-C] < file\n");
+ (void) fprintf(stderr, "\t -v -- verbose\n");
+ (void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
+ exit(1);
+}
+
+/*
+ * ssread - send stream read.
+ *
+ * Read while computing incremental checksum
+ */
+
+static size_t
+ssread(void *buf, size_t len, zio_cksum_t *cksum)
+{
+ size_t outlen;
+
+ if ((outlen = fread(buf, len, 1, send_stream)) == 0)
+ return (0);
+
+ if (do_cksum && cksum) {
+ if (do_byteswap)
+ fletcher_4_incremental_byteswap(buf, len, cksum);
+ else
+ fletcher_4_incremental_native(buf, len, cksum);
+ }
+ total_stream_len += len;
+ return (outlen);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *buf = malloc(INITIAL_BUFLEN);
+ dmu_replay_record_t thedrr;
+ dmu_replay_record_t *drr = &thedrr;
+ struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
+ struct drr_end *drre = &thedrr.drr_u.drr_end;
+ struct drr_object *drro = &thedrr.drr_u.drr_object;
+ struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects;
+ struct drr_write *drrw = &thedrr.drr_u.drr_write;
+ struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
+ struct drr_free *drrf = &thedrr.drr_u.drr_free;
+ struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+ char c;
+ boolean_t verbose = B_FALSE;
+ boolean_t first = B_TRUE;
+ int err;
+ zio_cksum_t zc = { 0 };
+ zio_cksum_t pcksum = { 0 };
+
+ while ((c = getopt(argc, argv, ":vC")) != -1) {
+ switch (c) {
+ case 'C':
+ do_cksum = B_FALSE;
+ break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr,
+ "missing argument for '%c' option\n", optopt);
+ usage();
+ break;
+ case '?':
+ (void) fprintf(stderr, "invalid option '%c'\n",
+ optopt);
+ usage();
+ }
+ }
+
+ if (isatty(STDIN_FILENO)) {
+ (void) fprintf(stderr,
+ "Error: Backup stream can not be read "
+ "from a terminal.\n"
+ "You must redirect standard input.\n");
+ exit(1);
+ }
+
+ send_stream = stdin;
+ pcksum = zc;
+ while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+
+ if (first) {
+ if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+ do_byteswap = B_TRUE;
+ if (do_cksum) {
+ ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+ /*
+ * recalculate header checksum now
+ * that we know it needs to be
+ * byteswapped.
+ */
+ fletcher_4_incremental_byteswap(drr,
+ sizeof (dmu_replay_record_t), &zc);
+ }
+ } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
+ (void) fprintf(stderr, "Invalid stream "
+ "(bad magic number)\n");
+ exit(1);
+ }
+ first = B_FALSE;
+ }
+ if (do_byteswap) {
+ drr->drr_type = BSWAP_32(drr->drr_type);
+ drr->drr_payloadlen =
+ BSWAP_32(drr->drr_payloadlen);
+ }
+
+ /*
+ * At this point, the leading fields of the replay record
+ * (drr_type and drr_payloadlen) have been byte-swapped if
+ * necessary, but the rest of the data structure (the
+ * union of type-specific structures) is still in its
+ * original state.
+ */
+ if (drr->drr_type >= DRR_NUMTYPES) {
+ (void) printf("INVALID record found: type 0x%x\n",
+ drr->drr_type);
+ (void) printf("Aborting.\n");
+ exit(1);
+ }
+
+ drr_record_count[drr->drr_type]++;
+
+ switch (drr->drr_type) {
+ case DRR_BEGIN:
+ if (do_byteswap) {
+ drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+ drrb->drr_versioninfo =
+ BSWAP_64(drrb->drr_versioninfo);
+ drrb->drr_creation_time =
+ BSWAP_64(drrb->drr_creation_time);
+ drrb->drr_type = BSWAP_32(drrb->drr_type);
+ drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+ drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+ drrb->drr_fromguid =
+ BSWAP_64(drrb->drr_fromguid);
+ }
+
+ (void) printf("BEGIN record\n");
+ (void) printf("\thdrtype = %lld\n",
+ DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
+ (void) printf("\tfeatures = %llx\n",
+ DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo));
+ (void) printf("\tmagic = %llx\n",
+ (u_longlong_t)drrb->drr_magic);
+ (void) printf("\tcreation_time = %llx\n",
+ (u_longlong_t)drrb->drr_creation_time);
+ (void) printf("\ttype = %u\n", drrb->drr_type);
+ (void) printf("\tflags = 0x%x\n", drrb->drr_flags);
+ (void) printf("\ttoguid = %llx\n",
+ (u_longlong_t)drrb->drr_toguid);
+ (void) printf("\tfromguid = %llx\n",
+ (u_longlong_t)drrb->drr_fromguid);
+ (void) printf("\ttoname = %s\n", drrb->drr_toname);
+ if (verbose)
+ (void) printf("\n");
+
+ if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) {
+ nvlist_t *nv;
+ int sz = drr->drr_payloadlen;
+
+ if (sz > 1<<20) {
+ free(buf);
+ buf = malloc(sz);
+ }
+ (void) ssread(buf, sz, &zc);
+ if (ferror(send_stream))
+ perror("fread");
+ err = nvlist_unpack(buf, sz, &nv, 0);
+ if (err)
+ perror(strerror(err));
+ nvlist_print(stdout, nv);
+ nvlist_free(nv);
+ }
+ break;
+
+ case DRR_END:
+ if (do_byteswap) {
+ drre->drr_checksum.zc_word[0] =
+ BSWAP_64(drre->drr_checksum.zc_word[0]);
+ drre->drr_checksum.zc_word[1] =
+ BSWAP_64(drre->drr_checksum.zc_word[1]);
+ drre->drr_checksum.zc_word[2] =
+ BSWAP_64(drre->drr_checksum.zc_word[2]);
+ drre->drr_checksum.zc_word[3] =
+ BSWAP_64(drre->drr_checksum.zc_word[3]);
+ }
+ /*
+ * We compare against the *previous* checksum
+ * value, because the stored checksum is of
+ * everything before the DRR_END record.
+ */
+ if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum,
+ pcksum)) {
+ (void) printf("Expected checksum differs from "
+ "checksum in stream.\n");
+ (void) printf("Expected checksum = "
+ "%llx/%llx/%llx/%llx\n",
+ pcksum.zc_word[0],
+ pcksum.zc_word[1],
+ pcksum.zc_word[2],
+ pcksum.zc_word[3]);
+ }
+ (void) printf("END checksum = %llx/%llx/%llx/%llx\n",
+ drre->drr_checksum.zc_word[0],
+ drre->drr_checksum.zc_word[1],
+ drre->drr_checksum.zc_word[2],
+ drre->drr_checksum.zc_word[3]);
+
+ ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+ break;
+
+ case DRR_OBJECT:
+ if (do_byteswap) {
+ drro->drr_object = BSWAP_64(drro->drr_object);
+ drro->drr_type = BSWAP_32(drro->drr_type);
+ drro->drr_bonustype =
+ BSWAP_32(drro->drr_bonustype);
+ drro->drr_blksz = BSWAP_32(drro->drr_blksz);
+ drro->drr_bonuslen =
+ BSWAP_32(drro->drr_bonuslen);
+ drro->drr_toguid = BSWAP_64(drro->drr_toguid);
+ }
+ if (verbose) {
+ (void) printf("OBJECT object = %llu type = %u "
+ "bonustype = %u blksz = %u bonuslen = %u\n",
+ (u_longlong_t)drro->drr_object,
+ drro->drr_type,
+ drro->drr_bonustype,
+ drro->drr_blksz,
+ drro->drr_bonuslen);
+ }
+ if (drro->drr_bonuslen > 0) {
+ (void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
+ 8), &zc);
+ }
+ break;
+
+ case DRR_FREEOBJECTS:
+ if (do_byteswap) {
+ drrfo->drr_firstobj =
+ BSWAP_64(drrfo->drr_firstobj);
+ drrfo->drr_numobjs =
+ BSWAP_64(drrfo->drr_numobjs);
+ drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid);
+ }
+ if (verbose) {
+ (void) printf("FREEOBJECTS firstobj = %llu "
+ "numobjs = %llu\n",
+ (u_longlong_t)drrfo->drr_firstobj,
+ (u_longlong_t)drrfo->drr_numobjs);
+ }
+ break;
+
+ case DRR_WRITE:
+ if (do_byteswap) {
+ drrw->drr_object = BSWAP_64(drrw->drr_object);
+ drrw->drr_type = BSWAP_32(drrw->drr_type);
+ drrw->drr_offset = BSWAP_64(drrw->drr_offset);
+ drrw->drr_length = BSWAP_64(drrw->drr_length);
+ drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
+ drrw->drr_key.ddk_prop =
+ BSWAP_64(drrw->drr_key.ddk_prop);
+ }
+ if (verbose) {
+ (void) printf("WRITE object = %llu type = %u "
+ "checksum type = %u\n"
+ "offset = %llu length = %llu "
+ "props = %llx\n",
+ (u_longlong_t)drrw->drr_object,
+ drrw->drr_type,
+ drrw->drr_checksumtype,
+ (u_longlong_t)drrw->drr_offset,
+ (u_longlong_t)drrw->drr_length,
+ (u_longlong_t)drrw->drr_key.ddk_prop);
+ }
+ (void) ssread(buf, drrw->drr_length, &zc);
+ total_write_size += drrw->drr_length;
+ break;
+
+ case DRR_WRITE_BYREF:
+ if (do_byteswap) {
+ drrwbr->drr_object =
+ BSWAP_64(drrwbr->drr_object);
+ drrwbr->drr_offset =
+ BSWAP_64(drrwbr->drr_offset);
+ drrwbr->drr_length =
+ BSWAP_64(drrwbr->drr_length);
+ drrwbr->drr_toguid =
+ BSWAP_64(drrwbr->drr_toguid);
+ drrwbr->drr_refguid =
+ BSWAP_64(drrwbr->drr_refguid);
+ drrwbr->drr_refobject =
+ BSWAP_64(drrwbr->drr_refobject);
+ drrwbr->drr_refoffset =
+ BSWAP_64(drrwbr->drr_refoffset);
+ drrwbr->drr_key.ddk_prop =
+ BSWAP_64(drrwbr->drr_key.ddk_prop);
+ }
+ if (verbose) {
+ (void) printf("WRITE_BYREF object = %llu "
+ "checksum type = %u props = %llx\n"
+ "offset = %llu length = %llu\n"
+ "toguid = %llx refguid = %llx\n"
+ "refobject = %llu refoffset = %llu\n",
+ (u_longlong_t)drrwbr->drr_object,
+ drrwbr->drr_checksumtype,
+ (u_longlong_t)drrwbr->drr_key.ddk_prop,
+ (u_longlong_t)drrwbr->drr_offset,
+ (u_longlong_t)drrwbr->drr_length,
+ (u_longlong_t)drrwbr->drr_toguid,
+ (u_longlong_t)drrwbr->drr_refguid,
+ (u_longlong_t)drrwbr->drr_refobject,
+ (u_longlong_t)drrwbr->drr_refoffset);
+ }
+ break;
+
+ case DRR_FREE:
+ if (do_byteswap) {
+ drrf->drr_object = BSWAP_64(drrf->drr_object);
+ drrf->drr_offset = BSWAP_64(drrf->drr_offset);
+ drrf->drr_length = BSWAP_64(drrf->drr_length);
+ }
+ if (verbose) {
+ (void) printf("FREE object = %llu "
+ "offset = %llu length = %lld\n",
+ (u_longlong_t)drrf->drr_object,
+ (u_longlong_t)drrf->drr_offset,
+ (longlong_t)drrf->drr_length);
+ }
+ break;
+ case DRR_SPILL:
+ if (do_byteswap) {
+ drrs->drr_object = BSWAP_64(drrs->drr_object);
+ drrs->drr_length = BSWAP_64(drrs->drr_length);
+ }
+ if (verbose) {
+ (void) printf("SPILL block for object = %llu "
+ "length = %llu\n", drrs->drr_object,
+ drrs->drr_length);
+ }
+ (void) ssread(buf, drrs->drr_length, &zc);
+ break;
+ }
+ pcksum = zc;
+ }
+ free(buf);
+
+ /* Print final summary */
+
+ (void) printf("SUMMARY:\n");
+ (void) printf("\tTotal DRR_BEGIN records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_BEGIN]);
+ (void) printf("\tTotal DRR_END records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_END]);
+ (void) printf("\tTotal DRR_OBJECT records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_OBJECT]);
+ (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
+ (void) printf("\tTotal DRR_WRITE records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_WRITE]);
+ (void) printf("\tTotal DRR_FREE records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_FREE]);
+ (void) printf("\tTotal DRR_SPILL records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_SPILL]);
+ (void) printf("\tTotal records = %lld\n",
+ (u_longlong_t)(drr_record_count[DRR_BEGIN] +
+ drr_record_count[DRR_OBJECT] +
+ drr_record_count[DRR_FREEOBJECTS] +
+ drr_record_count[DRR_WRITE] +
+ drr_record_count[DRR_FREE] +
+ drr_record_count[DRR_SPILL] +
+ drr_record_count[DRR_END]));
+ (void) printf("\tTotal write size = %lld (0x%llx)\n",
+ (u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
+ (void) printf("\tTotal stream length = %lld (0x%llx)\n",
+ (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index 3894f6baa543..d3502309eab5 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -86,14 +85,16 @@
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
#include <sys/zil.h>
+#include <sys/zil_impl.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
#include <sys/spa_impl.h>
+#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
+#include <sys/dsl_scan.h>
+#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <stdio.h>
#include <stdio_ext.h>
@@ -106,6 +107,7 @@
#include <math.h>
#include <errno.h>
#include <sys/fs/zfs.h>
+#include <libnvpair.h>
static char cmdname[] = "ztest";
static char *zopt_pool = cmdname;
@@ -126,144 +128,231 @@ static int zopt_verbose = 0;
static int zopt_init = 1;
static char *zopt_dir = "/tmp";
static uint64_t zopt_time = 300; /* 5 minutes */
-static int zopt_maxfaults;
+static uint64_t zopt_maxloops = 50; /* max loops during spa_freeze() */
+
+#define BT_MAGIC 0x123456789abcdefULL
+#define MAXFAULTS() (MAX(zs->zs_mirrors, 1) * (zopt_raidz_parity + 1) - 1)
+
+enum ztest_io_type {
+ ZTEST_IO_WRITE_TAG,
+ ZTEST_IO_WRITE_PATTERN,
+ ZTEST_IO_WRITE_ZEROES,
+ ZTEST_IO_TRUNCATE,
+ ZTEST_IO_SETATTR,
+ ZTEST_IO_TYPES
+};
typedef struct ztest_block_tag {
+ uint64_t bt_magic;
uint64_t bt_objset;
uint64_t bt_object;
uint64_t bt_offset;
+ uint64_t bt_gen;
uint64_t bt_txg;
- uint64_t bt_thread;
- uint64_t bt_seq;
+ uint64_t bt_crtxg;
} ztest_block_tag_t;
-typedef struct ztest_args {
- char za_pool[MAXNAMELEN];
- spa_t *za_spa;
- objset_t *za_os;
- zilog_t *za_zilog;
- thread_t za_thread;
- uint64_t za_instance;
- uint64_t za_random;
- uint64_t za_diroff;
- uint64_t za_diroff_shared;
- uint64_t za_zil_seq;
- hrtime_t za_start;
- hrtime_t za_stop;
- hrtime_t za_kill;
- /*
- * Thread-local variables can go here to aid debugging.
- */
- ztest_block_tag_t za_rbt;
- ztest_block_tag_t za_wbt;
- dmu_object_info_t za_doi;
- dmu_buf_t *za_dbuf;
-} ztest_args_t;
-
-typedef void ztest_func_t(ztest_args_t *);
+typedef struct bufwad {
+ uint64_t bw_index;
+ uint64_t bw_txg;
+ uint64_t bw_data;
+} bufwad_t;
+
+/*
+ * XXX -- fix zfs range locks to be generic so we can use them here.
+ */
+typedef enum {
+ RL_READER,
+ RL_WRITER,
+ RL_APPEND
+} rl_type_t;
+
+typedef struct rll {
+ void *rll_writer;
+ int rll_readers;
+ mutex_t rll_lock;
+ cond_t rll_cv;
+} rll_t;
+
+typedef struct rl {
+ uint64_t rl_object;
+ uint64_t rl_offset;
+ uint64_t rl_size;
+ rll_t *rl_lock;
+} rl_t;
+
+#define ZTEST_RANGE_LOCKS 64
+#define ZTEST_OBJECT_LOCKS 64
+
+/*
+ * Object descriptor. Used as a template for object lookup/create/remove.
+ */
+typedef struct ztest_od {
+ uint64_t od_dir;
+ uint64_t od_object;
+ dmu_object_type_t od_type;
+ dmu_object_type_t od_crtype;
+ uint64_t od_blocksize;
+ uint64_t od_crblocksize;
+ uint64_t od_gen;
+ uint64_t od_crgen;
+ char od_name[MAXNAMELEN];
+} ztest_od_t;
+
+/*
+ * Per-dataset state.
+ */
+typedef struct ztest_ds {
+ objset_t *zd_os;
+ zilog_t *zd_zilog;
+ uint64_t zd_seq;
+ ztest_od_t *zd_od; /* debugging aid */
+ char zd_name[MAXNAMELEN];
+ mutex_t zd_dirobj_lock;
+ rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
+ rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
+} ztest_ds_t;
+
+/*
+ * Per-iteration state.
+ */
+typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
+
+typedef struct ztest_info {
+ ztest_func_t *zi_func; /* test function */
+ uint64_t zi_iters; /* iterations per execution */
+ uint64_t *zi_interval; /* execute every <interval> seconds */
+ uint64_t zi_call_count; /* per-pass count */
+ uint64_t zi_call_time; /* per-pass time */
+ uint64_t zi_call_next; /* next time to call this function */
+} ztest_info_t;
/*
* Note: these aren't static because we want dladdr() to work.
*/
ztest_func_t ztest_dmu_read_write;
-ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
-ztest_func_t ztest_fzap;
ztest_func_t ztest_zap_parallel;
-ztest_func_t ztest_traverse;
-ztest_func_t ztest_dsl_prop_get_set;
+ztest_func_t ztest_zil_commit;
+ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_objset_create_destroy;
+ztest_func_t ztest_dmu_prealloc;
+ztest_func_t ztest_fzap;
ztest_func_t ztest_dmu_snapshot_create_destroy;
-ztest_func_t ztest_dsl_dataset_promote_busy;
+ztest_func_t ztest_dsl_prop_get_set;
+ztest_func_t ztest_spa_prop_get_set;
ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
+ztest_func_t ztest_ddt_repair;
+ztest_func_t ztest_dmu_snapshot_hold;
ztest_func_t ztest_spa_rename;
+ztest_func_t ztest_scrub;
+ztest_func_t ztest_dsl_dataset_promote_busy;
ztest_func_t ztest_vdev_attach_detach;
ztest_func_t ztest_vdev_LUN_growth;
ztest_func_t ztest_vdev_add_remove;
ztest_func_t ztest_vdev_aux_add_remove;
-ztest_func_t ztest_scrub;
+ztest_func_t ztest_split_pool;
-typedef struct ztest_info {
- ztest_func_t *zi_func; /* test function */
- uint64_t zi_iters; /* iterations per execution */
- uint64_t *zi_interval; /* execute every <interval> seconds */
- uint64_t zi_calls; /* per-pass count */
- uint64_t zi_call_time; /* per-pass time */
- uint64_t zi_call_total; /* cumulative total */
- uint64_t zi_call_target; /* target cumulative total */
-} ztest_info_t;
-
-uint64_t zopt_always = 0; /* all the time */
-uint64_t zopt_often = 1; /* every second */
-uint64_t zopt_sometimes = 10; /* every 10 seconds */
-uint64_t zopt_rarely = 60; /* every 60 seconds */
+uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
+uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
+uint64_t zopt_often = 1ULL * NANOSEC; /* every second */
+uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */
+uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */
ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
- { ztest_dmu_read_write_zcopy, 1, &zopt_always },
- { ztest_dmu_write_parallel, 30, &zopt_always },
+ { ztest_dmu_write_parallel, 10, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
+ { ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
- { ztest_fzap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
- { ztest_dsl_prop_get_set, 1, &zopt_sometimes },
- { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
- { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
- { ztest_spa_create_destroy, 1, &zopt_sometimes },
+ { ztest_split_pool, 1, &zopt_always },
+ { ztest_zil_commit, 1, &zopt_incessant },
+ { ztest_dmu_read_write_zcopy, 1, &zopt_often },
+ { ztest_dmu_objset_create_destroy, 1, &zopt_often },
+ { ztest_dsl_prop_get_set, 1, &zopt_often },
+ { ztest_spa_prop_get_set, 1, &zopt_sometimes },
+#if 0
+ { ztest_dmu_prealloc, 1, &zopt_sometimes },
+#endif
+ { ztest_fzap, 1, &zopt_sometimes },
+ { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
+ { ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
+ { ztest_ddt_repair, 1, &zopt_sometimes },
+ { ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
{ ztest_spa_rename, 1, &zopt_rarely },
- { ztest_vdev_attach_detach, 1, &zopt_rarely },
- { ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_scrub, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
- { ztest_vdev_add_remove, 1, &zopt_vdevtime },
+ { ztest_vdev_attach_detach, 1, &zopt_rarely },
+ { ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_vdev_add_remove, 1, &zopt_vdevtime },
{ ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
- { ztest_scrub, 1, &zopt_vdevtime },
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
-#define ZTEST_SYNC_LOCKS 16
+/*
+ * The following struct is used to hold a list of uncalled commit callbacks.
+ * The callbacks are ordered by txg number.
+ */
+typedef struct ztest_cb_list {
+ mutex_t zcl_callbacks_lock;
+ list_t zcl_callbacks;
+} ztest_cb_list_t;
/*
* Stuff we need to share writably between parent and child.
*/
typedef struct ztest_shared {
- mutex_t zs_vdev_lock;
- rwlock_t zs_name_lock;
- uint64_t zs_vdev_primaries;
- uint64_t zs_vdev_aux;
+ char *zs_pool;
+ spa_t *zs_spa;
+ hrtime_t zs_proc_start;
+ hrtime_t zs_proc_stop;
+ hrtime_t zs_thread_start;
+ hrtime_t zs_thread_stop;
+ hrtime_t zs_thread_kill;
uint64_t zs_enospc_count;
- hrtime_t zs_start_time;
- hrtime_t zs_stop_time;
+ uint64_t zs_vdev_next_leaf;
+ uint64_t zs_vdev_aux;
uint64_t zs_alloc;
uint64_t zs_space;
+ mutex_t zs_vdev_lock;
+ rwlock_t zs_name_lock;
ztest_info_t zs_info[ZTEST_FUNCS];
- mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
- uint64_t zs_seq[ZTEST_SYNC_LOCKS];
+ uint64_t zs_splits;
+ uint64_t zs_mirrors;
+ ztest_ds_t zs_zd[];
} ztest_shared_t;
+#define ID_PARALLEL -1ULL
+
static char ztest_dev_template[] = "%s/%s.%llua";
static char ztest_aux_template[] = "%s/%s.%s.%llu";
-static ztest_shared_t *ztest_shared;
+ztest_shared_t *ztest_shared;
+uint64_t *ztest_seq;
static int ztest_random_fd;
static int ztest_dump_core = 1;
-static uint64_t metaslab_sz;
static boolean_t ztest_exiting;
+/* Global commit callback list */
+static ztest_cb_list_t zcl;
+
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
+static uint64_t metaslab_sz;
-#define ZTEST_DIROBJ 1
-#define ZTEST_MICROZAP_OBJ 2
-#define ZTEST_FATZAP_OBJ 3
-
-#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10)
-#define ZTEST_DIRSIZE 256
+enum ztest_object {
+ ZTEST_META_DNODE = 0,
+ ZTEST_DIROBJ,
+ ZTEST_OBJECTS
+};
static void usage(boolean_t) __NORETURN;
@@ -381,21 +470,22 @@ usage(boolean_t requested)
(void) fprintf(fp, "Usage: %s\n"
"\t[-v vdevs (default: %llu)]\n"
"\t[-s size_of_each_vdev (default: %s)]\n"
- "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
+ "\t[-a alignment_shift (default: %d)] use 0 for random\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
"\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
- "\t[-i initialize pool i times (default: %d)]\n"
- "\t[-k kill percentage (default: %llu%%)]\n"
+ "\t[-i init_count (default: %d)] initialize pool i times\n"
+ "\t[-k kill_percentage (default: %llu%%)]\n"
"\t[-p pool_name (default: %s)]\n"
- "\t[-f file directory for vdev files (default: %s)]\n"
- "\t[-V(erbose)] (use multiple times for ever more blather)\n"
- "\t[-E(xisting)] (use existing pool instead of creating new one)\n"
- "\t[-T time] total run time (default: %llu sec)\n"
- "\t[-P passtime] time per pass (default: %llu sec)\n"
+ "\t[-f dir (default: %s)] file directory for vdev files\n"
+ "\t[-V] verbose (use multiple times for ever more blather)\n"
+ "\t[-E] use existing pool instead of creating new one\n"
+ "\t[-T time (default: %llu sec)] total run time\n"
+ "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
+ "\t[-P passtime (default: %llu sec)] time per pass\n"
"\t[-h] (print help)\n"
"",
cmdname,
@@ -413,31 +503,11 @@ usage(boolean_t requested)
zopt_pool, /* -p */
zopt_dir, /* -f */
(u_longlong_t)zopt_time, /* -T */
+ (u_longlong_t)zopt_maxloops, /* -F */
(u_longlong_t)zopt_passtime); /* -P */
exit(requested ? 0 : 1);
}
-static uint64_t
-ztest_random(uint64_t range)
-{
- uint64_t r;
-
- if (range == 0)
- return (0);
-
- if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
- fatal(1, "short read from /dev/urandom");
-
- return (r % range);
-}
-
-/* ARGSUSED */
-static void
-ztest_record_enospc(char *s)
-{
- ztest_shared->zs_enospc_count++;
-}
-
static void
process_options(int argc, char **argv)
{
@@ -451,7 +521,7 @@ process_options(int argc, char **argv)
metaslab_gang_bang = 32 << 10;
while ((opt = getopt(argc, argv,
- "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:h")) != EOF) {
+ "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:")) != EOF) {
value = 0;
switch (opt) {
case 'v':
@@ -467,6 +537,7 @@ process_options(int argc, char **argv)
case 'k':
case 'T':
case 'P':
+ case 'F':
value = nicenumtoull(optarg);
}
switch (opt) {
@@ -486,7 +557,7 @@ process_options(int argc, char **argv)
zopt_raidz = MAX(1, value);
break;
case 'R':
- zopt_raidz_parity = MIN(MAX(value, 1), 2);
+ zopt_raidz_parity = MIN(MAX(value, 1), 3);
break;
case 'd':
zopt_datasets = MAX(1, value);
@@ -521,6 +592,9 @@ process_options(int argc, char **argv)
case 'P':
zopt_passtime = MAX(1, value);
break;
+ case 'F':
+ zopt_maxloops = MAX(1, value);
+ break;
case 'h':
usage(B_TRUE);
break;
@@ -533,8 +607,37 @@ process_options(int argc, char **argv)
zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
- zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
- zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
+ zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time * NANOSEC / zopt_vdevs :
+ UINT64_MAX >> 2);
+}
+
+static void
+ztest_kill(ztest_shared_t *zs)
+{
+ zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(zs->zs_spa));
+ zs->zs_space = metaslab_class_get_space(spa_normal_class(zs->zs_spa));
+ (void) kill(getpid(), SIGKILL);
+}
+
+static uint64_t
+ztest_random(uint64_t range)
+{
+ uint64_t r;
+
+ if (range == 0)
+ return (0);
+
+ if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
+ fatal(1, "short read from /dev/urandom");
+
+ return (r % range);
+}
+
+/* ARGSUSED */
+static void
+ztest_record_enospc(const char *s)
+{
+ ztest_shared->zs_enospc_count++;
}
static uint64_t
@@ -563,7 +666,7 @@ make_vdev_file(char *path, char *aux, size_t size, uint64_t ashift)
(void) sprintf(path, ztest_aux_template,
zopt_dir, zopt_pool, aux, vdev);
} else {
- vdev = ztest_shared->zs_vdev_primaries++;
+ vdev = ztest_shared->zs_vdev_next_leaf++;
(void) sprintf(path, ztest_dev_template,
zopt_dir, zopt_pool, vdev);
}
@@ -674,100 +777,807 @@ make_vdev_root(char *path, char *aux, size_t size, uint64_t ashift,
return (root);
}
+static int
+ztest_random_blocksize(void)
+{
+ return (1 << (SPA_MINBLOCKSHIFT +
+ ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
+}
+
+static int
+ztest_random_ibshift(void)
+{
+ return (DN_MIN_INDBLKSHIFT +
+ ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
+}
+
+static uint64_t
+ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
+{
+ uint64_t top;
+ vdev_t *rvd = spa->spa_root_vdev;
+ vdev_t *tvd;
+
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+
+ do {
+ top = ztest_random(rvd->vdev_children);
+ tvd = rvd->vdev_child[top];
+ } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
+ tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
+
+ return (top);
+}
+
+static uint64_t
+ztest_random_dsl_prop(zfs_prop_t prop)
+{
+ uint64_t value;
+
+ do {
+ value = zfs_prop_random_value(prop, ztest_random(-1ULL));
+ } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
+
+ return (value);
+}
+
+static int
+ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
+ boolean_t inherit)
+{
+ const char *propname = zfs_prop_to_name(prop);
+ const char *valname;
+ char setpoint[MAXPATHLEN];
+ uint64_t curval;
+ int error;
+
+ error = dsl_prop_set(osname, propname,
+ (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
+ sizeof (value), 1, &value);
+
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (error);
+ }
+ ASSERT3U(error, ==, 0);
+
+ VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
+ 1, &curval, setpoint), ==, 0);
+
+ if (zopt_verbose >= 6) {
+ VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
+ (void) printf("%s %s = %s at '%s'\n",
+ osname, propname, valname, setpoint);
+ }
+
+ return (error);
+}
+
+static int
+ztest_spa_prop_set_uint64(ztest_shared_t *zs, zpool_prop_t prop, uint64_t value)
+{
+ spa_t *spa = zs->zs_spa;
+ nvlist_t *props = NULL;
+ int error;
+
+ VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
+
+ error = spa_prop_set(spa, props);
+
+ nvlist_free(props);
+
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (error);
+ }
+ ASSERT3U(error, ==, 0);
+
+ return (error);
+}
+
+static void
+ztest_rll_init(rll_t *rll)
+{
+ rll->rll_writer = NULL;
+ rll->rll_readers = 0;
+ VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
+ VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
+}
+
static void
-ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
+ztest_rll_destroy(rll_t *rll)
+{
+ ASSERT(rll->rll_writer == NULL);
+ ASSERT(rll->rll_readers == 0);
+ VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
+ VERIFY(cond_destroy(&rll->rll_cv) == 0);
+}
+
+static void
+ztest_rll_lock(rll_t *rll, rl_type_t type)
+{
+ VERIFY(mutex_lock(&rll->rll_lock) == 0);
+
+ if (type == RL_READER) {
+ while (rll->rll_writer != NULL)
+ (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ rll->rll_readers++;
+ } else {
+ while (rll->rll_writer != NULL || rll->rll_readers)
+ (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ rll->rll_writer = curthread;
+ }
+
+ VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+}
+
+static void
+ztest_rll_unlock(rll_t *rll)
+{
+ VERIFY(mutex_lock(&rll->rll_lock) == 0);
+
+ if (rll->rll_writer) {
+ ASSERT(rll->rll_readers == 0);
+ rll->rll_writer = NULL;
+ } else {
+ ASSERT(rll->rll_readers != 0);
+ ASSERT(rll->rll_writer == NULL);
+ rll->rll_readers--;
+ }
+
+ if (rll->rll_writer == NULL && rll->rll_readers == 0)
+ VERIFY(cond_broadcast(&rll->rll_cv) == 0);
+
+ VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+}
+
+static void
+ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
+{
+ rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
+
+ ztest_rll_lock(rll, type);
+}
+
+static void
+ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
+{
+ rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
+
+ ztest_rll_unlock(rll);
+}
+
+static rl_t *
+ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
+ uint64_t size, rl_type_t type)
+{
+ uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
+ rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
+ rl_t *rl;
+
+ rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
+ rl->rl_object = object;
+ rl->rl_offset = offset;
+ rl->rl_size = size;
+ rl->rl_lock = rll;
+
+ ztest_rll_lock(rll, type);
+
+ return (rl);
+}
+
+static void
+ztest_range_unlock(rl_t *rl)
+{
+ rll_t *rll = rl->rl_lock;
+
+ ztest_rll_unlock(rll);
+
+ umem_free(rl, sizeof (*rl));
+}
+
+static void
+ztest_zd_init(ztest_ds_t *zd, objset_t *os)
+{
+ zd->zd_os = os;
+ zd->zd_zilog = dmu_objset_zil(os);
+ zd->zd_seq = 0;
+ dmu_objset_name(os, zd->zd_name);
+
+ VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
+
+ for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
+ ztest_rll_init(&zd->zd_object_lock[l]);
+
+ for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
+ ztest_rll_init(&zd->zd_range_lock[l]);
+}
+
+static void
+ztest_zd_fini(ztest_ds_t *zd)
+{
+ VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
+
+ for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_object_lock[l]);
+
+ for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_range_lock[l]);
+}
+
+#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
+
+static uint64_t
+ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
{
- int bs = SPA_MINBLOCKSHIFT +
- ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
- int ibs = DN_MIN_INDBLKSHIFT +
- ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
+ uint64_t txg;
int error;
- error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
+ /*
+ * Attempt to assign tx to some transaction group.
+ */
+ error = dmu_tx_assign(tx, txg_how);
if (error) {
- char osname[300];
- dmu_objset_name(os, osname);
- fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
- osname, object, 1 << bs, ibs, error);
+ if (error == ERESTART) {
+ ASSERT(txg_how == TXG_NOWAIT);
+ dmu_tx_wait(tx);
+ } else {
+ ASSERT3U(error, ==, ENOSPC);
+ ztest_record_enospc(tag);
+ }
+ dmu_tx_abort(tx);
+ return (0);
}
+ txg = dmu_tx_get_txg(tx);
+ ASSERT(txg != 0);
+ return (txg);
+}
+
+static void
+ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
+{
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+
+ while (ip < ip_end)
+ *ip++ = value;
}
-static uint8_t
-ztest_random_checksum(void)
+static boolean_t
+ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
{
- uint8_t checksum;
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+ uint64_t diff = 0;
- do {
- checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
- } while (zio_checksum_table[checksum].ci_zbt);
+ while (ip < ip_end)
+ diff |= (value - *ip++);
+
+ return (diff == 0);
+}
+
+static void
+ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ bt->bt_magic = BT_MAGIC;
+ bt->bt_objset = dmu_objset_id(os);
+ bt->bt_object = object;
+ bt->bt_offset = offset;
+ bt->bt_gen = gen;
+ bt->bt_txg = txg;
+ bt->bt_crtxg = crtxg;
+}
+
+static void
+ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ ASSERT(bt->bt_magic == BT_MAGIC);
+ ASSERT(bt->bt_objset == dmu_objset_id(os));
+ ASSERT(bt->bt_object == object);
+ ASSERT(bt->bt_offset == offset);
+ ASSERT(bt->bt_gen <= gen);
+ ASSERT(bt->bt_txg <= txg);
+ ASSERT(bt->bt_crtxg == crtxg);
+}
+
+static ztest_block_tag_t *
+ztest_bt_bonus(dmu_buf_t *db)
+{
+ dmu_object_info_t doi;
+ ztest_block_tag_t *bt;
+
+ dmu_object_info_from_db(db, &doi);
+ ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
+ ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
+ bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
+
+ return (bt);
+}
+
+/*
+ * ZIL logging ops
+ */
+
+#define lrz_type lr_mode
+#define lrz_blocksize lr_uid
+#define lrz_ibshift lr_gid
+#define lrz_bonustype lr_rdev
+#define lrz_bonuslen lr_crtime[1]
+
+static void
+ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
- if (checksum == ZIO_CHECKSUM_OFF)
- checksum = ZIO_CHECKSUM_ON;
+ itx->itx_oid = object;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
+{
+ itx_t *itx;
+ itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ if (lr->lr_length > ZIL_MAX_LOG_DATA)
+ write_state = WR_INDIRECT;
+
+ itx = zil_itx_create(TX_WRITE,
+ sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
+
+ if (write_state == WR_COPIED &&
+ dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
+ zil_itx_destroy(itx);
+ itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+ write_state = WR_NEED_COPY;
+ }
+ itx->itx_private = zd;
+ itx->itx_wr_state = write_state;
+ itx->itx_sync = (ztest_random(8) == 0);
+ itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);
+
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
- return (checksum);
+static void
+ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
+{
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
}
-static uint8_t
-ztest_random_compress(void)
+static void
+ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
{
- return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
}
+/*
+ * ZIL replay ops
+ */
static int
-ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
+ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ ztest_block_tag_t *bbt;
+ dmu_buf_t *db;
dmu_tx_t *tx;
- int error;
+ uint64_t txg;
+ int error = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+ } else {
+ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+ }
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0)
+ return (ENOSPC);
+
+ ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = zap_create(os,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = zap_create_claim(os, lr->lr_foid,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ } else {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = dmu_object_alloc(os,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = dmu_object_claim(os, lr->lr_foid,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ }
+
if (error) {
- dmu_tx_abort(tx);
+ ASSERT3U(error, ==, EEXIST);
+ ASSERT(zd->zd_zilog->zl_replay);
+ dmu_tx_commit(tx);
return (error);
}
- error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
- DMU_OT_NONE, 0, tx);
- ASSERT3U(error, ==, 0);
+ ASSERT(lr->lr_foid != 0);
+
+ if (lr->lrz_type != DMU_OT_ZAP_OTHER)
+ VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
+ lr->lrz_blocksize, lr->lrz_ibshift, tx));
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+ bbt = ztest_bt_bonus(db);
+ dmu_buf_will_dirty(db, tx);
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
+ dmu_buf_rele(db, FTAG);
+
+ VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
+ &lr->lr_foid, tx));
+
+ (void) ztest_log_create(zd, tx, lr);
+
dmu_tx_commit(tx);
- if (zopt_verbose >= 5) {
- char osname[MAXNAMELEN];
- dmu_objset_name(os, osname);
- (void) printf("replay create of %s object %llu"
- " in txg %llu = %d\n",
- osname, (u_longlong_t)lr->lr_doid,
- (u_longlong_t)dmu_tx_get_txg(tx), error);
+ return (0);
+}
+
+static int
+ztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ dmu_object_info_t doi;
+ dmu_tx_t *tx;
+ uint64_t object, txg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
+ VERIFY3U(0, ==,
+ zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
+ ASSERT(object != 0);
+
+ ztest_object_lock(zd, object, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
+ dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_object_unlock(zd, object);
+ return (ENOSPC);
}
- return (error);
+ if (doi.doi_type == DMU_OT_ZAP_OTHER) {
+ VERIFY3U(0, ==, zap_destroy(os, object, tx));
+ } else {
+ VERIFY3U(0, ==, dmu_object_free(os, object, tx));
+ }
+
+ VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
+
+ (void) ztest_log_remove(zd, tx, lr, object);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, object);
+
+ return (0);
}
static int
-ztest_replay_remove(objset_t *os, lr_remove_t *lr, boolean_t byteswap)
+ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
{
+ objset_t *os = zd->zd_os;
+ void *data = lr + 1; /* data follows lr */
+ uint64_t offset, length;
+ ztest_block_tag_t *bt = data;
+ ztest_block_tag_t *bbt;
+ uint64_t gen, txg, lrtxg, crtxg;
+ dmu_object_info_t doi;
dmu_tx_t *tx;
- int error;
+ dmu_buf_t *db;
+ arc_buf_t *abuf = NULL;
+ rl_t *rl;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+
+ /* If it's a dmu_sync() block, write the whole block */
+ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+ uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
+ if (length < blocksize) {
+ offset -= offset % blocksize;
+ length = blocksize;
+ }
+ }
+
+ if (bt->bt_magic == BSWAP_64(BT_MAGIC))
+ byteswap_uint64_array(bt, sizeof (*bt));
+
+ if (bt->bt_magic != BT_MAGIC)
+ bt = NULL;
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ dmu_object_info_from_db(db, &doi);
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ gen = bbt->bt_gen;
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- return (error);
+
+ dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
+
+ if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
+ P2PHASE(offset, length) == 0)
+ abuf = dmu_request_arcbuf(db, length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ if (abuf != NULL)
+ dmu_return_arcbuf(abuf);
+ dmu_buf_rele(db, FTAG);
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ if (bt != NULL) {
+ /*
+ * Usually, verify the old data before writing new data --
+ * but not always, because we also want to verify correct
+ * behavior when the data was not recently read into cache.
+ */
+ ASSERT(offset % doi.doi_data_block_size == 0);
+ if (ztest_random(4) != 0) {
+ int prefetch = ztest_random(2) ?
+ DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
+ ztest_block_tag_t rbt;
+
+ VERIFY(dmu_read(os, lr->lr_foid, offset,
+ sizeof (rbt), &rbt, prefetch) == 0);
+ if (rbt.bt_magic == BT_MAGIC) {
+ ztest_bt_verify(&rbt, os, lr->lr_foid,
+ offset, gen, txg, crtxg);
+ }
+ }
+
+ /*
+ * Writes can appear to be newer than the bonus buffer because
+ * the ztest_get_data() callback does a dmu_read() of the
+ * open-context data, which may be different than the data
+ * as it was when the write was generated.
+ */
+ if (zd->zd_zilog->zl_replay) {
+ ztest_bt_verify(bt, os, lr->lr_foid, offset,
+ MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
+ bt->bt_crtxg);
+ }
+
+ /*
+ * Set the bt's gen/txg to the bonus buffer's gen/txg
+ * so that all of the usual ASSERTs will work.
+ */
+ ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
}
- error = dmu_object_free(os, lr->lr_doid, tx);
+ if (abuf == NULL) {
+ dmu_write(os, lr->lr_foid, offset, length, data, tx);
+ } else {
+ bcopy(data, abuf->b_data, length);
+ dmu_assign_arcbuf(db, offset, abuf, tx);
+ }
+
+ (void) ztest_log_write(zd, tx, lr);
+
+ dmu_buf_rele(db, FTAG);
+
dmu_tx_commit(tx);
- return (error);
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
+ lr->lr_length, tx) == 0);
+
+ (void) ztest_log_truncate(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ uint64_t txg, lrtxg, crtxg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, lr->lr_foid);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
+ if (zd->zd_zilog->zl_replay) {
+ ASSERT(lr->lr_size != 0);
+ ASSERT(lr->lr_mode != 0);
+ ASSERT(lrtxg != 0);
+ } else {
+ /*
+ * Randomly change the size and increment the generation.
+ */
+ lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
+ sizeof (*bbt);
+ lr->lr_mode = bbt->bt_gen + 1;
+ ASSERT(lrtxg == 0);
+ }
+
+ /*
+ * Verify that the current bonus buffer is not newer than our txg.
+ */
+ ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
+ MAX(txg, lrtxg), crtxg);
+
+ dmu_buf_will_dirty(db, tx);
+
+ ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
+ ASSERT3U(lr->lr_size, <=, db->db_size);
+ VERIFY3U(dmu_set_bonus(db, lr->lr_size, tx), ==, 0);
+ bbt = ztest_bt_bonus(db);
+
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
+
+ dmu_buf_rele(db, FTAG);
+
+ (void) ztest_log_setattr(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
}
zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
@@ -780,9 +1590,9 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_RMDIR */
NULL, /* TX_LINK */
NULL, /* TX_RENAME */
- NULL, /* TX_WRITE */
- NULL, /* TX_TRUNCATE */
- NULL, /* TX_SETATTR */
+ ztest_replay_write, /* TX_WRITE */
+ ztest_replay_truncate, /* TX_TRUNCATE */
+ ztest_replay_setattr, /* TX_SETATTR */
NULL, /* TX_ACL */
NULL, /* TX_CREATE_ACL */
NULL, /* TX_CREATE_ATTR */
@@ -794,13 +1604,477 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
};
/*
+ * ZIL get_data callbacks
+ */
+
+static void
+ztest_get_done(zgd_t *zgd, int error)
+{
+ ztest_ds_t *zd = zgd->zgd_private;
+ uint64_t object = zgd->zgd_rl->rl_object;
+
+ if (zgd->zgd_db)
+ dmu_buf_rele(zgd->zgd_db, zgd);
+
+ ztest_range_unlock(zgd->zgd_rl);
+ ztest_object_unlock(zd, object);
+
+ if (error == 0 && zgd->zgd_bp)
+ zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
+
+ umem_free(zgd, sizeof (*zgd));
+}
+
+static int
+ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
+{
+ ztest_ds_t *zd = arg;
+ objset_t *os = zd->zd_os;
+ uint64_t object = lr->lr_foid;
+ uint64_t offset = lr->lr_offset;
+ uint64_t size = lr->lr_length;
+ blkptr_t *bp = &lr->lr_blkptr;
+ uint64_t txg = lr->lr_common.lrc_txg;
+ uint64_t crtxg;
+ dmu_object_info_t doi;
+ dmu_buf_t *db;
+ zgd_t *zgd;
+ int error;
+
+ ztest_object_lock(zd, object, RL_READER);
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error) {
+ ztest_object_unlock(zd, object);
+ return (error);
+ }
+
+ crtxg = ztest_bt_bonus(db)->bt_crtxg;
+
+ if (crtxg == 0 || crtxg > txg) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, object);
+ return (ENOENT);
+ }
+
+ dmu_object_info_from_db(db, &doi);
+ dmu_buf_rele(db, FTAG);
+ db = NULL;
+
+ zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
+ zgd->zgd_zilog = zd->zd_zilog;
+ zgd->zgd_private = zd;
+
+ if (buf != NULL) { /* immediate write */
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_read(os, object, offset, size, buf,
+ DMU_READ_NO_PREFETCH);
+ ASSERT(error == 0);
+ } else {
+ size = doi.doi_data_block_size;
+ if (ISP2(size)) {
+ offset = P2ALIGN(offset, size);
+ } else {
+ ASSERT(offset < size);
+ offset = 0;
+ }
+
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_buf_hold(os, object, offset, zgd, &db,
+ DMU_READ_NO_PREFETCH);
+
+ if (error == 0) {
+ zgd->zgd_db = db;
+ zgd->zgd_bp = bp;
+
+ ASSERT(db->db_offset == offset);
+ ASSERT(db->db_size == size);
+
+ error = dmu_sync(zio, lr->lr_common.lrc_txg,
+ ztest_get_done, zgd);
+
+ if (error == 0)
+ return (0);
+ }
+ }
+
+ ztest_get_done(zgd, error);
+
+ return (error);
+}
+
+static void *
+ztest_lr_alloc(size_t lrsize, char *name)
+{
+ char *lr;
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
+
+ if (name)
+ bcopy(name, lr + lrsize, namesize);
+
+ return (lr);
+}
+
+void
+ztest_lr_free(void *lr, size_t lrsize, char *name)
+{
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ umem_free(lr, lrsize + namesize);
+}
+
+/*
+ * Lookup a bunch of objects. Returns the number of objects not found.
+ */
+static int
+ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ for (int i = 0; i < count; i++, od++) {
+ od->od_object = 0;
+ error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
+ sizeof (uint64_t), 1, &od->od_object);
+ if (error) {
+ ASSERT(error == ENOENT);
+ ASSERT(od->od_object == 0);
+ missing++;
+ } else {
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ dmu_object_info_t doi;
+
+ ASSERT(od->od_object != 0);
+ ASSERT(missing == 0); /* there should be no gaps */
+
+ ztest_object_lock(zd, od->od_object, RL_READER);
+ VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
+ od->od_object, FTAG, &db));
+ dmu_object_info_from_db(db, &doi);
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ od->od_type = doi.doi_type;
+ od->od_blocksize = doi.doi_data_block_size;
+ od->od_gen = bbt->bt_gen;
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, od->od_object);
+ }
+ }
+
+ return (missing);
+}
+
+static int
+ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ for (int i = 0; i < count; i++, od++) {
+ if (missing) {
+ od->od_object = 0;
+ missing++;
+ continue;
+ }
+
+ lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+ lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */
+ lr->lrz_type = od->od_crtype;
+ lr->lrz_blocksize = od->od_crblocksize;
+ lr->lrz_ibshift = ztest_random_ibshift();
+ lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
+ lr->lrz_bonuslen = dmu_bonus_max();
+ lr->lr_gen = od->od_crgen;
+ lr->lr_crtime[0] = time(NULL);
+
+ if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
+ ASSERT(missing == 0);
+ od->od_object = 0;
+ missing++;
+ } else {
+ od->od_object = lr->lr_foid;
+ od->od_type = od->od_crtype;
+ od->od_blocksize = od->od_crblocksize;
+ od->od_gen = od->od_crgen;
+ ASSERT(od->od_object != 0);
+ }
+
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ od += count - 1;
+
+ for (int i = count - 1; i >= 0; i--, od--) {
+ if (missing) {
+ missing++;
+ continue;
+ }
+
+ if (od->od_object == 0)
+ continue;
+
+ lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+
+ if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
+ ASSERT3U(error, ==, ENOSPC);
+ missing++;
+ } else {
+ od->od_object = 0;
+ }
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
+ void *data)
+{
+ lr_write_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+ lr->lr_blkoff = 0;
+ BP_ZERO(&lr->lr_blkptr);
+
+ bcopy(data, lr + 1, size);
+
+ error = ztest_replay_write(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr) + size, NULL);
+
+ return (error);
+}
+
+static int
+ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ lr_truncate_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+
+ error = ztest_replay_truncate(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static int
+ztest_setattr(ztest_ds_t *zd, uint64_t object)
+{
+ lr_setattr_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_size = 0;
+ lr->lr_mode = 0;
+
+ error = ztest_replay_setattr(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static void
+ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ txg_wait_synced(dmu_objset_pool(os), 0);
+
+ ztest_object_lock(zd, object, RL_READER);
+ rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_write(tx, object, offset, size);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+
+ if (txg != 0) {
+ dmu_prealloc(os, object, offset, size, tx);
+ dmu_tx_commit(tx);
+ txg_wait_synced(dmu_objset_pool(os), txg);
+ } else {
+ (void) dmu_free_long_range(os, object, offset, size);
+ }
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, object);
+}
+
+static void
+ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
+{
+ ztest_block_tag_t wbt;
+ dmu_object_info_t doi;
+ enum ztest_io_type io_type;
+ uint64_t blocksize;
+ void *data;
+
+ VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
+ blocksize = doi.doi_data_block_size;
+ data = umem_alloc(blocksize, UMEM_NOFAIL);
+
+ /*
+ * Pick an i/o type at random, biased toward writing block tags.
+ */
+ io_type = ztest_random(ZTEST_IO_TYPES);
+ if (ztest_random(2) == 0)
+ io_type = ZTEST_IO_WRITE_TAG;
+
+ switch (io_type) {
+
+ case ZTEST_IO_WRITE_TAG:
+ ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
+ (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
+ break;
+
+ case ZTEST_IO_WRITE_PATTERN:
+ (void) memset(data, 'a' + (object + offset) % 5, blocksize);
+ if (ztest_random(2) == 0) {
+ /*
+ * Induce fletcher2 collisions to ensure that
+ * zio_ddt_collision() detects and resolves them
+ * when using fletcher2-verify for deduplication.
+ */
+ ((uint64_t *)data)[0] ^= 1ULL << 63;
+ ((uint64_t *)data)[4] ^= 1ULL << 63;
+ }
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_WRITE_ZEROES:
+ bzero(data, blocksize);
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_TRUNCATE:
+ (void) ztest_truncate(zd, object, offset, blocksize);
+ break;
+
+ case ZTEST_IO_SETATTR:
+ (void) ztest_setattr(zd, object);
+ break;
+ }
+
+ umem_free(data, blocksize);
+}
+
+/*
+ * Initialize an object description template.
+ */
+static void
+ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
+ dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
+{
+ od->od_dir = ZTEST_DIROBJ;
+ od->od_object = 0;
+
+ od->od_crtype = type;
+ od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
+ od->od_crgen = gen;
+
+ od->od_type = DMU_OT_NONE;
+ od->od_blocksize = 0;
+ od->od_gen = 0;
+
+ (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
+ tag, (int64_t)id, index);
+}
+
+/*
+ * Lookup or create the objects for a test using the od template.
+ * If the objects do not all exist, or if 'remove' is specified,
+ * remove any existing objects and create new ones. Otherwise,
+ * use the existing objects.
+ */
+static int
+ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
+{
+ int count = size / sizeof (*od);
+ int rv = 0;
+
+ VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
+ if ((ztest_lookup(zd, od, count) != 0 || remove) &&
+ (ztest_remove(zd, od, count) != 0 ||
+ ztest_create(zd, od, count) != 0))
+ rv = -1;
+ zd->zd_od = od;
+ VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
+
+ return (rv);
+}
+
+/* ARGSUSED */
+void
+ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
+{
+ zilog_t *zilog = zd->zd_zilog;
+
+ zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
+
+ /*
+ * Remember the committed values in zd, which is in parent/child
+ * shared memory. If we die, the next iteration of ztest_run()
+ * will verify that the log really does contain this record.
+ */
+ mutex_enter(&zilog->zl_lock);
+ ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
+ zd->zd_seq = zilog->zl_commit_lr_seq;
+ mutex_exit(&zilog->zl_lock);
+}
+
+/*
* Verify that we can't destroy an active pool, create an existing pool,
* or create a pool with a bad vdev spec.
*/
+/* ARGSUSED */
void
-ztest_spa_create_destroy(ztest_args_t *za)
+ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
{
- int error;
+ ztest_shared_t *zs = ztest_shared;
spa_t *spa;
nvlist_t *nvroot;
@@ -808,41 +2082,31 @@ ztest_spa_create_destroy(ztest_args_t *za)
* Attempt to create using a bad file.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
- error = spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_file) = %d", error);
/*
* Attempt to create using a bad mirror.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1);
- error = spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_mirror) = %d", error);
/*
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
+ (void) rw_rdlock(&zs->zs_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
- error = spa_create(za->za_pool, nvroot, NULL, NULL, NULL);
+ VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != EEXIST)
- fatal(0, "spa_create(whatever) = %d", error);
-
- error = spa_open(za->za_pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
- error = spa_destroy(za->za_pool);
- if (error != EBUSY)
- fatal(0, "spa_destroy() = %d", error);
-
+ VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
+ VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool));
spa_close(spa, FTAG);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+
+ (void) rw_unlock(&zs->zs_name_lock);
}
static vdev_t *
@@ -862,49 +2126,101 @@ vdev_lookup_by_path(vdev_t *vd, const char *path)
}
/*
+ * Find the first available hole which can be used as a top-level.
+ */
+int
+find_vdev_hole(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ int c;
+
+ ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
+
+ for (c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *cvd = rvd->vdev_child[c];
+
+ if (cvd->vdev_ishole)
+ break;
+ }
+ return (c);
+}
+
+/*
* Verify that vdev_add() works as expected.
*/
+/* ARGSUSED */
void
-ztest_vdev_add_remove(ztest_args_t *za)
+ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ uint64_t leaves;
+ uint64_t guid;
nvlist_t *nvroot;
int error;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- ztest_shared->zs_vdev_primaries =
- spa->spa_root_vdev->vdev_children * leaves;
-
- spa_config_exit(spa, SCL_VDEV, FTAG);
+ ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
/*
- * Make 1/4 of the devices be log devices.
+ * If we have slogs then remove them 1/4 of the time.
*/
- nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
- ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
+ if (spa_has_slogs(spa) && ztest_random(4) == 0) {
+ /*
+ * Grab the guid from the head of the log class rotor.
+ */
+ guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
- error = spa_vdev_add(spa, nvroot);
- nvlist_free(nvroot);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ /*
+ * We have to grab the zs_name_lock as writer to
+ * prevent a race between removing a slog (dmu_objset_find)
+ * and destroying a dataset. Removing the slog will
+ * grab a reference on the dataset which may cause
+ * dmu_objset_destroy() to fail with EBUSY thus
+ * leaving the dataset in an inconsistent state.
+ */
+ VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0);
+ error = spa_vdev_remove(spa, guid, B_FALSE);
+ VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0);
+
+ if (error && error != EEXIST)
+ fatal(0, "spa_vdev_remove() = %d", error);
+ } else {
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ /*
+ * Make 1/4 of the devices be log devices.
+ */
+ nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
+ ztest_random(4) == 0, zopt_raidz, zs->zs_mirrors, 1);
+
+ error = spa_vdev_add(spa, nvroot);
+ nvlist_free(nvroot);
+
+ if (error == ENOSPC)
+ ztest_record_enospc("spa_vdev_add");
+ else if (error != 0)
+ fatal(0, "spa_vdev_add() = %d", error);
+ }
- if (error == ENOSPC)
- ztest_record_enospc("spa_vdev_add");
- else if (error != 0)
- fatal(0, "spa_vdev_add() = %d", error);
+ VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0);
}
/*
* Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
*/
+/* ARGSUSED */
void
-ztest_vdev_aux_add_remove(ztest_args_t *za)
+ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
vdev_t *rvd = spa->spa_root_vdev;
spa_aux_vdev_t *sav;
char *aux;
@@ -919,7 +2235,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
aux = ZPOOL_CONFIG_L2CACHE;
}
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -932,12 +2248,12 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
/*
* Find an unused device we can add.
*/
- ztest_shared->zs_vdev_aux = 0;
+ zs->zs_vdev_aux = 0;
for (;;) {
char path[MAXPATHLEN];
int c;
(void) sprintf(path, ztest_aux_template, zopt_dir,
- zopt_pool, aux, ztest_shared->zs_vdev_aux);
+ zopt_pool, aux, zs->zs_vdev_aux);
for (c = 0; c < sav->sav_count; c++)
if (strcmp(sav->sav_vdevs[c]->vdev_path,
path) == 0)
@@ -945,7 +2261,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
if (c == sav->sav_count &&
vdev_lookup_by_path(rvd, path) == NULL)
break;
- ztest_shared->zs_vdev_aux++;
+ zs->zs_vdev_aux++;
}
}
@@ -968,28 +2284,126 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
* of devices that have pending state changes.
*/
if (ztest_random(2) == 0)
- (void) vdev_online(spa, guid, B_FALSE, NULL);
+ (void) vdev_online(spa, guid, 0, NULL);
error = spa_vdev_remove(spa, guid, B_FALSE);
if (error != 0 && error != EBUSY)
fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+}
+
+/*
+ * split a pool if it has mirror tlvdevs
+ */
+/* ARGSUSED */
+void
+ztest_split_pool(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ nvlist_t *tree, **child, *config, *split, **schild;
+ uint_t c, children, schildren = 0, lastlogid = 0;
+ int error = 0;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+
+ /* ensure we have a useable config; mirrors of raidz aren't supported */
+ if (zs->zs_mirrors < 3 || zopt_raidz > 1) {
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+
+ /* clean up the old pool, if any */
+ (void) spa_destroy("splitp");
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+ /* generate a config from the existing config */
+ mutex_enter(&spa->spa_props_lock);
+ VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
+ &tree) == 0);
+ mutex_exit(&spa->spa_props_lock);
+
+ VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) == 0);
+
+ schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
+ for (c = 0; c < children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ nvlist_t **mchild;
+ uint_t mchildren;
+
+ if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
+ VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
+ 0) == 0);
+ VERIFY(nvlist_add_string(schild[schildren],
+ ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
+ VERIFY(nvlist_add_uint64(schild[schildren],
+ ZPOOL_CONFIG_IS_HOLE, 1) == 0);
+ if (lastlogid == 0)
+ lastlogid = schildren;
+ ++schildren;
+ continue;
+ }
+ lastlogid = 0;
+ VERIFY(nvlist_lookup_nvlist_array(child[c],
+ ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+ VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
+ }
+
+ /* OK, create a config that can be used to split */
+ VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) == 0);
+ VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
+ lastlogid != 0 ? lastlogid : schildren) == 0);
+
+ VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
+
+ for (c = 0; c < schildren; c++)
+ nvlist_free(schild[c]);
+ free(schild);
+ nvlist_free(split);
+
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ (void) rw_wrlock(&zs->zs_name_lock);
+ error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
+ (void) rw_unlock(&zs->zs_name_lock);
+
+ nvlist_free(config);
+
+ if (error == 0) {
+ (void) printf("successful split - results:\n");
+ mutex_enter(&spa_namespace_lock);
+ show_pool_stats(spa);
+ show_pool_stats(spa_lookup("splitp"));
+ mutex_exit(&spa_namespace_lock);
+ ++zs->zs_splits;
+ --zs->zs_mirrors;
+ }
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
}
/*
* Verify that we can attach and detach devices.
*/
+/* ARGSUSED */
void
-ztest_vdev_attach_detach(ztest_args_t *za)
+ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
spa_aux_vdev_t *sav = &spa->spa_spares;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *pvd;
nvlist_t *root;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t leaf, top;
uint64_t ashift = ztest_get_ashift();
uint64_t oldguid, pguid;
@@ -1001,7 +2415,8 @@ ztest_vdev_attach_detach(ztest_args_t *za)
int oldvd_is_log;
int error, expected_error;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -1013,7 +2428,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
/*
* Pick a random top-level vdev.
*/
- top = ztest_random(rvd->vdev_children);
+ top = ztest_random_vdev_top(spa, B_TRUE);
/*
* Pick a random leaf within it.
@@ -1024,9 +2439,9 @@ ztest_vdev_attach_detach(ztest_args_t *za)
* Locate this vdev.
*/
oldvd = rvd->vdev_child[top];
- if (zopt_mirrors >= 1) {
+ if (zs->zs_mirrors >= 1) {
ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
- ASSERT(oldvd->vdev_children >= zopt_mirrors);
+ ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
oldvd = oldvd->vdev_child[leaf / zopt_raidz];
}
if (zopt_raidz > 1) {
@@ -1046,7 +2461,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
}
oldguid = oldvd->vdev_guid;
- oldsize = vdev_get_rsize(oldvd);
+ oldsize = vdev_get_min_asize(oldvd);
oldvd_is_log = oldvd->vdev_top->vdev_islog;
(void) strcpy(oldpath, oldvd->vdev_path);
pvd = oldvd->vdev_parent;
@@ -1061,7 +2476,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
if (error != 0 && error != ENODEV && error != EBUSY &&
error != ENOTSUP)
fatal(0, "detach (%s) returned %d", oldpath, error);
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
return;
}
@@ -1082,7 +2497,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
}
if (newvd) {
- newsize = vdev_get_rsize(newvd);
+ newsize = vdev_get_min_asize(newvd);
} else {
/*
* Make newsize a little bigger or smaller than oldsize.
@@ -1154,169 +2569,373 @@ ztest_vdev_attach_detach(ztest_args_t *za)
(longlong_t)newsize, replacing, error, expected_error);
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
}
/*
- * Verify that dynamic LUN growth works as expected.
+ * Callback function which expands the physical size of the vdev.
*/
-void
-ztest_vdev_LUN_growth(ztest_args_t *za)
+vdev_t *
+grow_vdev(vdev_t *vd, void *arg)
{
- spa_t *spa = za->za_spa;
- char dev_name[MAXPATHLEN];
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- uint64_t vdev;
+ spa_t *spa = vd->vdev_spa;
+ size_t *newsize = arg;
size_t fsize;
int fd;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
+ return (vd);
+
+ fsize = lseek(fd, 0, SEEK_END);
+ (void) ftruncate(fd, *newsize);
+
+ if (zopt_verbose >= 6) {
+ (void) printf("%s grew from %lu to %lu bytes\n",
+ vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
+ }
+ (void) close(fd);
+ return (NULL);
+}
+
+/*
+ * Callback function which expands a given vdev by calling vdev_online().
+ */
+/* ARGSUSED */
+vdev_t *
+online_vdev(vdev_t *vd, void *arg)
+{
+ spa_t *spa = vd->vdev_spa;
+ vdev_t *tvd = vd->vdev_top;
+ uint64_t guid = vd->vdev_guid;
+ uint64_t generation = spa->spa_config_generation + 1;
+ vdev_state_t newstate = VDEV_STATE_UNKNOWN;
+ int error;
+
+ ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ /* Calling vdev_online will initialize the new metaslabs */
+ spa_config_exit(spa, SCL_STATE, spa);
+ error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
/*
- * Pick a random leaf vdev.
+ * If vdev_online returned an error or the underlying vdev_open
+ * failed then we abort the expand. The only way to know that
+ * vdev_open fails is by checking the returned newstate.
*/
- spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
- spa_config_exit(spa, SCL_VDEV, FTAG);
+ if (error || newstate != VDEV_STATE_HEALTHY) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Unable to expand vdev, state %llu, "
+ "error %d\n", (u_longlong_t)newstate, error);
+ }
+ return (vd);
+ }
+ ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
+
+ /*
+ * Since we dropped the lock we need to ensure that we're
+ * still talking to the original vdev. It's possible this
+ * vdev may have been detached/replaced while we were
+ * trying to online it.
+ */
+ if (generation != spa->spa_config_generation) {
+ if (zopt_verbose >= 5) {
+ (void) printf("vdev configuration has changed, "
+ "guid %llu, state %llu, expected gen %llu, "
+ "got gen %llu\n",
+ (u_longlong_t)guid,
+ (u_longlong_t)tvd->vdev_state,
+ (u_longlong_t)generation,
+ (u_longlong_t)spa->spa_config_generation);
+ }
+ return (vd);
+ }
+ return (NULL);
+}
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+/*
+ * Traverse the vdev tree calling the supplied function.
+ * We continue to walk the tree until we either have walked all
+ * children or we receive a non-NULL return from the callback.
+ * If a NULL callback is passed, then we just return back the first
+ * leaf vdev we encounter.
+ */
+vdev_t *
+vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
+{
+ if (vd->vdev_ops->vdev_op_leaf) {
+ if (func == NULL)
+ return (vd);
+ else
+ return (func(vd, arg));
+ }
- if ((fd = open(dev_name, O_RDWR)) != -1) {
- /*
- * Determine the size.
- */
- fsize = lseek(fd, 0, SEEK_END);
+ for (uint_t c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
+ return (cvd);
+ }
+ return (NULL);
+}
- /*
- * If it's less than 2x the original size, grow by around 3%.
- */
- if (fsize < 2 * zopt_vdev_size) {
- size_t newsize = fsize + ztest_random(fsize / 32);
- (void) ftruncate(fd, newsize);
- if (zopt_verbose >= 6) {
- (void) printf("%s grew from %lu to %lu bytes\n",
- dev_name, (ulong_t)fsize, (ulong_t)newsize);
- }
+/*
+ * Verify that dynamic LUN growth works as expected.
+ */
+/* ARGSUSED */
+void
+ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ vdev_t *vd, *tvd;
+ metaslab_class_t *mc;
+ metaslab_group_t *mg;
+ size_t psize, newsize;
+ uint64_t top;
+ uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+ top = ztest_random_vdev_top(spa, B_TRUE);
+
+ tvd = spa->spa_root_vdev->vdev_child[top];
+ mg = tvd->vdev_mg;
+ mc = mg->mg_class;
+ old_ms_count = tvd->vdev_ms_count;
+ old_class_space = metaslab_class_get_space(mc);
+
+ /*
+ * Determine the size of the first leaf vdev associated with
+ * our top-level device.
+ */
+ vd = vdev_walk_tree(tvd, NULL, NULL);
+ ASSERT3P(vd, !=, NULL);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ psize = vd->vdev_psize;
+
+ /*
+ * We only try to expand the vdev if it's healthy, less than 4x its
+ * original size, and it has a valid psize.
+ */
+ if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
+ psize == 0 || psize >= 4 * zopt_vdev_size) {
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+ ASSERT(psize > 0);
+ newsize = psize + psize / 8;
+ ASSERT3U(newsize, >, psize);
+
+ if (zopt_verbose >= 6) {
+ (void) printf("Expanding LUN %s from %lu to %lu\n",
+ vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
+ }
+
+ /*
+ * Growing the vdev is a two step process:
+ * 1). expand the physical size (i.e. relabel)
+ * 2). online the vdev to create the new metaslabs
+ */
+ if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
+ vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
+ tvd->vdev_state != VDEV_STATE_HEALTHY) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Could not expand LUN because "
+ "the vdev configuration changed.\n");
}
- (void) close(fd);
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ spa_config_exit(spa, SCL_STATE, spa);
+
+ /*
+ * Expanding the LUN will update the config asynchronously,
+ * thus we must wait for the async thread to complete any
+ * pending tasks before proceeding.
+ */
+ for (;;) {
+ boolean_t done;
+ mutex_enter(&spa->spa_async_lock);
+ done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
+ mutex_exit(&spa->spa_async_lock);
+ if (done)
+ break;
+ txg_wait_synced(spa_get_dsl(spa), 0);
+ (void) poll(NULL, 0, 100);
+ }
+
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+ tvd = spa->spa_root_vdev->vdev_child[top];
+ new_ms_count = tvd->vdev_ms_count;
+ new_class_space = metaslab_class_get_space(mc);
+
+ if (tvd->vdev_mg != mg || mg->mg_class != mc) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Could not verify LUN expansion due to "
+ "intervening vdev offline or remove.\n");
+ }
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+
+ /*
+ * Make sure we were able to grow the vdev.
+ */
+ if (new_ms_count <= old_ms_count)
+ fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
+ old_ms_count, new_ms_count);
+
+ /*
+ * Make sure we were able to grow the pool.
+ */
+ if (new_class_space <= old_class_space)
+ fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
+ old_class_space, new_class_space);
+
+ if (zopt_verbose >= 5) {
+ char oldnumbuf[6], newnumbuf[6];
+
+ nicenum(old_class_space, oldnumbuf);
+ nicenum(new_class_space, newnumbuf);
+ (void) printf("%s grew from %s to %s\n",
+ spa->spa_name, oldnumbuf, newnumbuf);
+ }
+
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
}
+/*
+ * Verify that dmu_objset_{create,destroy,open,close} work as expected.
+ */
/* ARGSUSED */
static void
-ztest_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
+ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
{
/*
- * Create the directory object.
+ * Create the objects common to all ztest datasets.
*/
- VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
- DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
- DMU_OT_UINT64_OTHER, 5 * sizeof (ztest_block_tag_t), tx) == 0);
-
- VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
+ VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+}
- VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ,
- DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+static int
+ztest_dataset_create(char *dsname)
+{
+ uint64_t zilset = ztest_random(100);
+ int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
+ ztest_objset_create_cb, NULL);
+
+ if (err || zilset < 80)
+ return (err);
+
+ (void) printf("Setting dataset %s to sync always\n", dsname);
+ return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
+ ZFS_SYNC_ALWAYS, B_FALSE));
}
+/* ARGSUSED */
static int
-ztest_destroy_cb(char *name, void *arg)
+ztest_objset_destroy_cb(const char *name, void *arg)
{
- ztest_args_t *za = arg;
objset_t *os;
- dmu_object_info_t *doi = &za->za_doi;
+ dmu_object_info_t doi;
int error;
/*
* Verify that the dataset contains a directory object.
*/
- error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &os);
- ASSERT3U(error, ==, 0);
- error = dmu_object_info(os, ZTEST_DIROBJ, doi);
+ VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
+ error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
ASSERT3U(error, ==, 0);
- ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
- ASSERT3S(doi->doi_physical_blks, >=, 0);
+ ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
+ ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
- dmu_objset_close(os);
+ dmu_objset_rele(os, FTAG);
/*
* Destroy the dataset.
*/
- error = dmu_objset_destroy(name);
- if (error) {
- (void) dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &os);
- fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
- }
+ VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
return (0);
}
-/*
- * Verify that dmu_objset_{create,destroy,open,close} work as expected.
- */
-static uint64_t
-ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode)
+static boolean_t
+ztest_snapshot_create(char *osname, uint64_t id)
{
- itx_t *itx;
- lr_create_t *lr;
- size_t namesize;
- char name[24];
-
- (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object);
- namesize = strlen(name) + 1;
-
- itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize +
- ztest_random(ZIL_MAX_BLKSZ));
- lr = (lr_create_t *)&itx->itx_lr;
- bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr));
- lr->lr_doid = object;
- lr->lr_foid = 0;
- lr->lr_mode = mode;
- lr->lr_uid = 0;
- lr->lr_gid = 0;
- lr->lr_gen = dmu_tx_get_txg(tx);
- lr->lr_crtime[0] = time(NULL);
- lr->lr_crtime[1] = 0;
- lr->lr_rdev = 0;
- bcopy(name, (char *)(lr + 1), namesize);
-
- return (zil_itx_assign(zilog, itx, tx));
+ char snapname[MAXNAMELEN];
+ int error;
+
+ (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
+ (u_longlong_t)id);
+
+ error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1,
+ NULL, NULL, B_FALSE, B_FALSE, -1);
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (B_FALSE);
+ }
+ if (error != 0 && error != EEXIST)
+ fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
+ return (B_TRUE);
}
+static boolean_t
+ztest_snapshot_destroy(char *osname, uint64_t id)
+{
+ char snapname[MAXNAMELEN];
+ int error;
+
+ (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
+ (u_longlong_t)id);
+
+ error = dmu_objset_destroy(snapname, B_FALSE);
+ if (error != 0 && error != ENOENT)
+ fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
void
-ztest_dmu_objset_create_destroy(ztest_args_t *za)
+ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
{
+ ztest_shared_t *zs = ztest_shared;
+ ztest_ds_t zdtmp;
+ int iters;
int error;
objset_t *os, *os2;
- char name[100];
- int basemode, expected_error;
+ char name[MAXNAMELEN];
zilog_t *zilog;
- uint64_t seq;
- uint64_t objects;
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
- (u_longlong_t)za->za_instance);
+ (void) rw_rdlock(&zs->zs_name_lock);
- basemode = DS_MODE_TYPE(za->za_instance);
- if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
- basemode = DS_MODE_USER;
+ (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
+ zs->zs_pool, (u_longlong_t)id);
/*
* If this dataset exists from a previous run, process its replay log
* half of the time. If we don't replay it, then dmu_objset_destroy()
- * (invoked from ztest_destroy_cb() below) should just throw it away.
+ * (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/
if (ztest_random(2) == 0 &&
- dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
- zil_replay(os, os, ztest_replay_vector);
- dmu_objset_close(os);
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
+ ztest_zd_init(&zdtmp, os);
+ zil_replay(os, &zdtmp, ztest_replay_vector);
+ ztest_zd_fini(&zdtmp);
+ dmu_objset_disown(os, FTAG);
}
/*
@@ -1324,170 +2943,152 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
* create lying around from a previous run. If so, destroy it
* and all of its snapshots.
*/
- (void) dmu_objset_find(name, ztest_destroy_cb, za,
+ (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error != ENOENT)
- fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
- name, os);
+ VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
/*
* Verify that we can create a new dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
- ztest_create_cb, NULL);
+ error = ztest_dataset_create(name);
if (error) {
if (error == ENOSPC) {
- ztest_record_enospc("dmu_objset_create");
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ ztest_record_enospc(FTAG);
+ (void) rw_unlock(&zs->zs_name_lock);
return;
}
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error) {
- fatal(0, "dmu_objset_open(%s) = %d", name, error);
- }
+ VERIFY3U(0, ==,
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
+
+ ztest_zd_init(&zdtmp, os);
/*
* Open the intent log for it.
*/
- zilog = zil_open(os, NULL);
+ zilog = zil_open(os, ztest_get_data);
/*
- * Put a random number of objects in there.
+ * Put some objects in there, do a little I/O to them,
+ * and randomly take a couple of snapshots along the way.
*/
- objects = ztest_random(20);
- seq = 0;
- while (objects-- != 0) {
- uint64_t object;
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- seq = ztest_log_create(zilog, tx, object,
- DMU_OT_UINT64_OTHER);
- dmu_write(os, object, 0, sizeof (name), name, tx);
- dmu_tx_commit(tx);
- }
- if (ztest_random(5) == 0) {
- zil_commit(zilog, seq, object);
- }
- if (ztest_random(100) == 0) {
- error = zil_suspend(zilog);
- if (error == 0) {
- zil_resume(zilog);
- }
- }
+ iters = ztest_random(5);
+ for (int i = 0; i < iters; i++) {
+ ztest_dmu_object_alloc_free(&zdtmp, id);
+ if (ztest_random(iters) == 0)
+ (void) ztest_snapshot_create(name, i);
}
/*
* Verify that we cannot create an existing dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL);
- if (error != EEXIST)
- fatal(0, "created existing dataset, error = %d", error);
+ VERIFY3U(EEXIST, ==,
+ dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
/*
- * Verify that multiple dataset holds are allowed, but only when
- * the new access mode is compatible with the base mode.
+ * Verify that we can hold an objset that is also owned.
*/
- if (basemode == DS_MODE_OWNER) {
- error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
- &os2);
- if (error)
- fatal(0, "dmu_objset_open('%s') = %d", name, error);
- else
- dmu_objset_close(os2);
- }
- error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
- expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
- if (error != expected_error)
- fatal(0, "dmu_objset_open('%s') = %d, expected %d",
- name, error, expected_error);
- if (error == 0)
- dmu_objset_close(os2);
+ VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
+ dmu_objset_rele(os2, FTAG);
- zil_close(zilog);
- dmu_objset_close(os);
+ /*
+ * Verify that we cannot own an objset that is already owned.
+ */
+ VERIFY3U(EBUSY, ==,
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
- error = dmu_objset_destroy(name);
- if (error)
- fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
+ zil_close(zilog);
+ dmu_objset_disown(os, FTAG);
+ ztest_zd_fini(&zdtmp);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_unlock(&zs->zs_name_lock);
}
/*
* Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
*/
void
-ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
+ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+
+ (void) rw_rdlock(&zs->zs_name_lock);
+ (void) ztest_snapshot_destroy(zd->zd_name, id);
+ (void) ztest_snapshot_create(zd->zd_name, id);
+ (void) rw_unlock(&zs->zs_name_lock);
+}
+
+/*
+ * Cleanup non-standard snapshots and clones.
+ */
+void
+ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
{
+ char snap1name[MAXNAMELEN];
+ char clone1name[MAXNAMELEN];
+ char snap2name[MAXNAMELEN];
+ char clone2name[MAXNAMELEN];
+ char snap3name[MAXNAMELEN];
int error;
- objset_t *os = za->za_os;
- char snapname[100];
- char osname[MAXNAMELEN];
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- dmu_objset_name(os, osname);
- (void) snprintf(snapname, 100, "%s@%llu", osname,
- (u_longlong_t)za->za_instance);
+ (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id);
+ (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
+ (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id);
+ (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
+ (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
- error = dmu_objset_destroy(snapname);
- if (error != 0 && error != ENOENT)
- fatal(0, "dmu_objset_destroy() = %d", error);
- error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
- NULL, FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot() = %d", error);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ error = dmu_objset_destroy(clone2name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
+ error = dmu_objset_destroy(snap3name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
+ error = dmu_objset_destroy(snap2name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
+ error = dmu_objset_destroy(clone1name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
+ error = dmu_objset_destroy(snap1name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
}
/*
* Verify dsl_dataset_promote handles EBUSY
*/
void
-ztest_dsl_dataset_promote_busy(ztest_args_t *za)
+ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
{
- int error;
- objset_t *os = za->za_os;
+ ztest_shared_t *zs = ztest_shared;
objset_t *clone;
dsl_dataset_t *ds;
- char snap1name[100];
- char clone1name[100];
- char snap2name[100];
- char clone2name[100];
- char snap3name[100];
- char osname[MAXNAMELEN];
- static uint64_t uniq = 0;
- uint64_t curval;
+ char snap1name[MAXNAMELEN];
+ char clone1name[MAXNAMELEN];
+ char snap2name[MAXNAMELEN];
+ char clone2name[MAXNAMELEN];
+ char snap3name[MAXNAMELEN];
+ char *osname = zd->zd_name;
+ int error;
- curval = atomic_add_64_nv(&uniq, 5) - 5;
+ (void) rw_rdlock(&zs->zs_name_lock);
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
+ ztest_dsl_dataset_cleanup(osname, id);
- dmu_objset_name(os, osname);
- (void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval++);
- (void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval++);
- (void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval++);
- (void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval++);
- (void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval++);
+ (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id);
+ (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
+ (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id);
+ (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
+ (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1496,14 +3097,12 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
}
- error = dmu_objset_open(snap1name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &clone);
+ error = dmu_objset_hold(snap1name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
- error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
- dmu_objset_close(clone);
+ error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
+ dmu_objset_rele(clone, FTAG);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1513,7 +3112,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
}
error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1523,7 +3122,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
}
error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1532,289 +3131,73 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
}
- error = dmu_objset_open(snap3name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &clone);
+ error = dmu_objset_hold(snap3name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
- error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
- dmu_objset_close(clone);
+ error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
+ dmu_objset_rele(clone, FTAG);
if (error) {
if (error == ENOSPC) {
- ztest_record_enospc("dmu_objset_create");
+ ztest_record_enospc(FTAG);
goto out;
}
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
- error = dsl_dataset_own(snap1name, 0, FTAG, &ds);
+ error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
if (error)
- fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error);
- error = dsl_dataset_promote(clone2name);
+ fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
+ error = dsl_dataset_promote(clone2name, NULL);
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
dsl_dataset_disown(ds, FTAG);
out:
- error = dmu_objset_destroy(clone2name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
-
- error = dmu_objset_destroy(snap3name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
+ ztest_dsl_dataset_cleanup(osname, id);
- error = dmu_objset_destroy(snap2name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
-
- error = dmu_objset_destroy(clone1name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
- error = dmu_objset_destroy(snap1name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
-
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_unlock(&zs->zs_name_lock);
}
/*
* Verify that dmu_object_{alloc,free} work as expected.
*/
void
-ztest_dmu_object_alloc_free(ztest_args_t *za)
+ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_buf_t *db;
- dmu_tx_t *tx;
- uint64_t batchobj, object, batchsize, endoff, temp;
- int b, c, error, bonuslen;
- dmu_object_info_t *doi = &za->za_doi;
- char osname[MAXNAMELEN];
-
- dmu_objset_name(os, osname);
+ ztest_od_t od[4];
+ int batchsize = sizeof (od) / sizeof (od[0]);
- endoff = -8ULL;
- batchsize = 2;
-
- /*
- * Create a batch object if necessary, and record it in the directory.
- */
- VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj, DMU_READ_PREFETCH));
- if (batchobj == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create a batch object");
- dmu_tx_abort(tx);
- return;
- }
- batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, batchobj, tx);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj, tx);
- dmu_tx_commit(tx);
- }
-
- /*
- * Destroy the previous batch of objects.
- */
- for (b = 0; b < batchsize; b++) {
- VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, DMU_READ_PREFETCH));
- if (object == 0)
- continue;
- /*
- * Read and validate contents.
- * We expect the nth byte of the bonus buffer to be n.
- */
- VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
- za->za_dbuf = db;
-
- dmu_object_info_from_db(db, doi);
- ASSERT(doi->doi_type == DMU_OT_UINT64_OTHER);
- ASSERT(doi->doi_bonus_type == DMU_OT_PLAIN_OTHER);
- ASSERT3S(doi->doi_physical_blks, >=, 0);
-
- bonuslen = doi->doi_bonus_size;
-
- for (c = 0; c < bonuslen; c++) {
- if (((uint8_t *)db->db_data)[c] !=
- (uint8_t)(c + bonuslen)) {
- fatal(0,
- "bad bonus: %s, obj %llu, off %d: %u != %u",
- osname, object, c,
- ((uint8_t *)db->db_data)[c],
- (uint8_t)(c + bonuslen));
- }
- }
-
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
-
- /*
- * We expect the word at endoff to be our object number.
- */
- VERIFY(0 == dmu_read(os, object, endoff,
- sizeof (uint64_t), &temp, DMU_READ_PREFETCH));
-
- if (temp != object) {
- fatal(0, "bad data in %s, got %llu, expected %llu",
- osname, temp, object);
- }
-
- /*
- * Destroy old object and clear batch entry.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj,
- b * sizeof (uint64_t), sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("free object");
- dmu_tx_abort(tx);
- return;
- }
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- object = 0;
-
- dmu_object_set_checksum(os, batchobj,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, batchobj,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- dmu_tx_commit(tx);
- }
-
- /*
- * Before creating the new batch of objects, generate a bunch of churn.
- */
- for (b = ztest_random(100); b > 0; b--) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("churn objects");
- dmu_tx_abort(tx);
- return;
- }
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- dmu_tx_commit(tx);
- }
+ for (int b = 0; b < batchsize; b++)
+ ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
/*
- * Create a new batch of objects with randomly chosen
- * blocksizes and record them in the batch directory.
+ * Destroy the previous batch of objects, create a new batch,
+ * and do some I/O on the new objects.
*/
- for (b = 0; b < batchsize; b++) {
- uint32_t va_blksize;
- u_longlong_t va_nblocks;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff,
- sizeof (uint64_t));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create batchobj");
- dmu_tx_abort(tx);
- return;
- }
- bonuslen = (int)ztest_random(dmu_bonus_max()) + 1;
-
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_PLAIN_OTHER, bonuslen, tx);
-
- ztest_set_random_blocksize(os, object, tx);
-
- dmu_object_set_checksum(os, object,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, object,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- /*
- * Write to both the bonus buffer and the regular data.
- */
- VERIFY(dmu_bonus_hold(os, object, FTAG, &db) == 0);
- za->za_dbuf = db;
- ASSERT3U(bonuslen, <=, db->db_size);
-
- dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
- ASSERT3S(va_nblocks, >=, 0);
-
- dmu_buf_will_dirty(db, tx);
-
- /*
- * See comments above regarding the contents of
- * the bonus buffer and the word at endoff.
- */
- for (c = 0; c < bonuslen; c++)
- ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
-
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
-
- /*
- * Write to a large offset to increase indirection.
- */
- dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx);
+ if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0)
+ return;
- dmu_tx_commit(tx);
- }
+ while (ztest_random(4 * batchsize) != 0)
+ ztest_io(zd, od[ztest_random(batchsize)].od_object,
+ ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
}
/*
* Verify that dmu_{read,write} work as expected.
*/
-typedef struct bufwad {
- uint64_t bw_index;
- uint64_t bw_txg;
- uint64_t bw_data;
-} bufwad_t;
-
-typedef struct dmu_read_write_dir {
- uint64_t dd_packobj;
- uint64_t dd_bigobj;
- uint64_t dd_chunk;
-} dmu_read_write_dir_t;
-
void
-ztest_dmu_read_write(ztest_args_t *za)
+ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_read_write_dir_t dd;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[2];
dmu_tx_t *tx;
int i, freeit, error;
uint64_t n, s, txg;
bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
- uint64_t packoff, packsize, bigoff, bigsize;
+ uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
+ uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
uint64_t regions = 997;
uint64_t stride = 123456789ULL;
uint64_t width = 40;
@@ -1847,34 +3230,16 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the directory info. If it's the first time, set things up.
*/
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd, DMU_READ_PREFETCH));
- if (dd.dd_chunk == 0) {
- ASSERT(dd.dd_packobj == 0);
- ASSERT(dd.dd_bigobj == 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create r/w directory");
- dmu_tx_abort(tx);
- return;
- }
-
- dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
- ztest_set_random_blocksize(os, dd.dd_packobj, tx);
- ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
- tx);
- dmu_tx_commit(tx);
- }
+ bigobj = od[0].od_object;
+ packobj = od[1].od_object;
+ chunksize = od[0].od_gen;
+ ASSERT(chunksize == od[1].od_gen);
/*
* Prefetch a random chunk of the big object.
@@ -1884,7 +3249,7 @@ ztest_dmu_read_write(ztest_args_t *za)
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
- dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk);
+ dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -1895,8 +3260,8 @@ ztest_dmu_read_write(ztest_args_t *za)
packoff = n * sizeof (bufwad_t);
packsize = s * sizeof (bufwad_t);
- bigoff = n * dd.dd_chunk;
- bigsize = s * dd.dd_chunk;
+ bigoff = n * chunksize;
+ bigsize = s * chunksize;
packbuf = umem_alloc(packsize, UMEM_NOFAIL);
bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
@@ -1910,10 +3275,10 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the current contents of our objects.
*/
- error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf,
+ error = dmu_read(os, packobj, packoff, packsize, packbuf,
DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
- error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf,
+ error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
@@ -1922,24 +3287,25 @@ ztest_dmu_read_write(ztest_args_t *za)
*/
tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
+ dmu_tx_hold_write(tx, packobj, packoff, packsize);
if (freeit)
- dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize);
+ dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
else
- dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
+ dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
- error = dmu_tx_assign(tx, TXG_WAIT);
-
- if (error) {
- ztest_record_enospc("dmu r/w range");
- dmu_tx_abort(tx);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0) {
umem_free(packbuf, packsize);
umem_free(bigbuf, bigsize);
return;
}
- txg = dmu_tx_get_txg(tx);
+ dmu_object_set_checksum(os, bigobj,
+ (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
+
+ dmu_object_set_compress(os, bigobj,
+ (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
/*
* For each index from n to n + s, verify that the existing bufwad
@@ -1951,9 +3317,9 @@ ztest_dmu_read_write(ztest_args_t *za)
/* LINTED */
pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
/* LINTED */
- bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+ bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
/* LINTED */
- bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+ bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
@@ -1987,27 +3353,26 @@ ztest_dmu_read_write(ztest_args_t *za)
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
- dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
+ dmu_write(os, packobj, packoff, packsize, packbuf, tx);
if (freeit) {
- if (zopt_verbose >= 6) {
+ if (zopt_verbose >= 7) {
(void) printf("freeing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)bigoff,
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
- bigsize, tx));
+ VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx));
} else {
- if (zopt_verbose >= 6) {
+ if (zopt_verbose >= 7) {
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)bigoff,
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx);
+ dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
}
dmu_tx_commit(tx);
@@ -2019,9 +3384,9 @@ ztest_dmu_read_write(ztest_args_t *za)
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
- VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+ VERIFY(0 == dmu_read(os, packobj, packoff,
packsize, packcheck, DMU_READ_PREFETCH));
- VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+ VERIFY(0 == dmu_read(os, bigobj, bigoff,
bigsize, bigcheck, DMU_READ_PREFETCH));
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
@@ -2037,7 +3402,7 @@ ztest_dmu_read_write(ztest_args_t *za)
void
compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
- uint64_t bigsize, uint64_t n, dmu_read_write_dir_t dd, uint64_t txg)
+ uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
{
uint64_t i;
bufwad_t *pack;
@@ -2054,9 +3419,9 @@ compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
/* LINTED */
pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
/* LINTED */
- bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+ bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
/* LINTED */
- bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+ bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
@@ -2085,22 +3450,24 @@ compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
}
void
-ztest_dmu_read_write_zcopy(ztest_args_t *za)
+ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_read_write_dir_t dd;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[2];
dmu_tx_t *tx;
uint64_t i;
int error;
uint64_t n, s, txg;
bufwad_t *packbuf, *bigbuf;
- uint64_t packoff, packsize, bigoff, bigsize;
+ uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
+ uint64_t blocksize = ztest_random_blocksize();
+ uint64_t chunksize = blocksize;
uint64_t regions = 997;
uint64_t stride = 123456789ULL;
uint64_t width = 9;
dmu_buf_t *bonus_db;
arc_buf_t **bigbuf_arcbufs;
- dmu_object_info_t *doi = &za->za_doi;
+ dmu_object_info_t doi;
/*
* This test uses two objects, packobj and bigobj, that are always
@@ -2121,42 +3488,22 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
/*
* Read the directory info. If it's the first time, set things up.
*/
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd, DMU_READ_PREFETCH));
- if (dd.dd_chunk == 0) {
- ASSERT(dd.dd_packobj == 0);
- ASSERT(dd.dd_bigobj == 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
- dmu_tx_hold_bo