aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Macy <mmacy@FreeBSD.org>2020-09-18 22:55:05 +0000
committerMatt Macy <mmacy@FreeBSD.org>2020-09-18 22:55:05 +0000
commit04bab0082226f7e72c41ad528298c43edc3fa316 (patch)
tree67433d716d54ef1de3f4b31a01227792c48506c2
parentb0a96e5e2d3c9480ec89dd4c034c7fe4f97abfe1 (diff)
downloadsrc-04bab0082226f7e72c41ad528298c43edc3fa316.tar.gz
src-04bab0082226f7e72c41ad528298c43edc3fa316.zip
Update openzfs to 2.0.0-rc2-g4ce06fvendor/openzfs/2.0-rc2-g4ce06f
Notes
Notes: svn path=/vendor-sys/openzfs/dist/; revision=365892 svn path=/vendor-sys/openzfs/2.0-rc2-g4ce06f/; revision=365893; tag=vendor/openzfs/2.0-rc2-g4ce06f
-rw-r--r--META2
-rw-r--r--cmd/mount_zfs/mount_zfs.c1
-rw-r--r--cmd/zdb/zdb.c8
-rw-r--r--cmd/zfs/zfs_main.c1
-rw-r--r--cmd/zgenhostid/.gitignore1
-rw-r--r--cmd/zgenhostid/Makefile.am6
-rwxr-xr-xcmd/zgenhostid/zgenhostid61
-rw-r--r--cmd/zgenhostid/zgenhostid.c152
-rw-r--r--cmd/zpool/zpool_main.c8
-rw-r--r--config/deb.am2
-rw-r--r--config/find_system_library.m45
-rw-r--r--config/rpm.am14
-rw-r--r--config/zfs-build.m47
-rw-r--r--configure.ac2
-rwxr-xr-xcontrib/dracut/90zfs/module-setup.sh.in9
-rwxr-xr-xcontrib/dracut/90zfs/parse-zfs.sh.in6
-rw-r--r--include/Makefile.am1
-rw-r--r--include/libzfs.h4
-rw-r--r--include/libzfs_core.h2
-rw-r--r--include/libzfsbootenv.h41
-rw-r--r--include/os/freebsd/spl/sys/ccompile.h5
-rw-r--r--include/os/freebsd/spl/sys/condvar.h10
-rw-r--r--include/os/freebsd/zfs/sys/Makefile.am1
-rw-r--r--include/os/freebsd/zfs/sys/zfs_bootenv_os.h29
-rw-r--r--include/os/freebsd/zfs/sys/zfs_context_os.h6
-rw-r--r--include/os/freebsd/zfs/sys/zfs_vfsops_os.h65
-rw-r--r--include/os/linux/kernel/linux/mod_compat.h2
-rw-r--r--include/os/linux/spl/sys/condvar.h7
-rw-r--r--include/os/linux/zfs/sys/Makefile.am1
-rw-r--r--include/os/linux/zfs/sys/zfs_bootenv_os.h29
-rw-r--r--include/sys/Makefile.am1
-rw-r--r--include/sys/dsl_synctask.h4
-rw-r--r--include/sys/fm/fs/zfs.h5
-rw-r--r--include/sys/fm/util.h3
-rw-r--r--include/sys/fs/zfs.h4
-rw-r--r--include/sys/spa.h6
-rw-r--r--include/sys/vdev.h3
-rw-r--r--include/sys/vdev_impl.h11
-rw-r--r--include/sys/zfs_bootenv.h53
-rw-r--r--include/sys/zfs_context.h10
-rw-r--r--include/sys/zfs_ioctl.h2
-rw-r--r--include/sys/zio.h4
-rw-r--r--lib/Makefile.am6
-rw-r--r--lib/libefi/rdwr_efi.c159
-rw-r--r--lib/libshare/os/linux/nfs.c1
-rw-r--r--lib/libspl/include/os/freebsd/sys/stat.h3
-rw-r--r--lib/libzfs/libzfs.pc.in4
-rw-r--r--lib/libzfs/libzfs_dataset.c5
-rw-r--r--lib/libzfs/libzfs_pool.c24
-rw-r--r--lib/libzfs/libzfs_sendrecv.c1
-rw-r--r--lib/libzfs/libzfs_util.c3
-rw-r--r--lib/libzfs/os/linux/libzfs_pool_os.c3
-rw-r--r--lib/libzfs_core/libzfs_core.c8
-rw-r--r--lib/libzfs_core/libzfs_core.pc.in4
-rw-r--r--lib/libzfsbootenv/.gitignore1
-rw-r--r--lib/libzfsbootenv/Makefile.am32
-rw-r--r--lib/libzfsbootenv/libzfsbootenv.pc.in12
-rw-r--r--lib/libzfsbootenv/lzbe_device.c164
-rw-r--r--lib/libzfsbootenv/lzbe_pair.c347
-rw-r--r--lib/libzfsbootenv/lzbe_util.c39
-rw-r--r--lib/libzpool/Makefile.am7
-rw-r--r--lib/libzutil/Makefile.am1
-rw-r--r--man/man5/zfs-module-parameters.561
-rw-r--r--man/man8/zfs-rename.89
-rw-r--r--man/man8/zfsprops.816
-rw-r--r--man/man8/zgenhostid.871
-rw-r--r--man/man8/zpoolprops.82
-rw-r--r--module/os/freebsd/zfs/kmod_core.c1
-rw-r--r--module/os/freebsd/zfs/sysctl_os.c1
-rw-r--r--module/os/freebsd/zfs/vdev_file.c14
-rw-r--r--module/os/freebsd/zfs/zfs_file_os.c5
-rw-r--r--module/os/freebsd/zfs/zfs_vfsops.c18
-rw-r--r--module/os/freebsd/zfs/zfs_vnops.c16
-rw-r--r--module/os/freebsd/zfs/zfs_znode.c2
-rw-r--r--module/os/linux/spl/spl-condvar.c44
-rw-r--r--module/os/linux/zfs/vdev_disk.c22
-rw-r--r--module/os/linux/zfs/vdev_file.c22
-rw-r--r--module/os/linux/zfs/zfs_acl.c2
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c4
-rw-r--r--module/os/linux/zfs/zpl_super.c8
-rw-r--r--module/os/linux/zfs/zpl_xattr.c16
-rw-r--r--module/zcommon/zfs_prop.c11
-rw-r--r--module/zfs/arc.c30
-rw-r--r--module/zfs/dbuf.c2
-rw-r--r--module/zfs/dmu_redact.c33
-rw-r--r--module/zfs/dnode.c25
-rw-r--r--module/zfs/dsl_scan.c1
-rw-r--r--module/zfs/dsl_synctask.c16
-rw-r--r--module/zfs/fm.c14
-rw-r--r--module/zfs/metaslab.c1
-rw-r--r--module/zfs/mmp.c22
-rw-r--r--module/zfs/range_tree.c1
-rw-r--r--module/zfs/spa.c42
-rw-r--r--module/zfs/spa_config.c8
-rw-r--r--module/zfs/spa_history.c5
-rw-r--r--module/zfs/txg.c12
-rw-r--r--module/zfs/vdev.c103
-rw-r--r--module/zfs/vdev_indirect.c16
-rw-r--r--module/zfs/vdev_initialize.c5
-rw-r--r--module/zfs/vdev_label.c114
-rw-r--r--module/zfs/vdev_mirror.c2
-rw-r--r--module/zfs/vdev_raidz.c35
-rw-r--r--module/zfs/vdev_rebuild.c14
-rw-r--r--module/zfs/vdev_removal.c9
-rw-r--r--module/zfs/vdev_trim.c7
-rw-r--r--module/zfs/zfs_fm.c361
-rw-r--r--module/zfs/zfs_ioctl.c23
-rw-r--r--module/zfs/zio.c42
-rw-r--r--module/zfs/zthr.c20
-rw-r--r--rpm/generic/zfs.spec.in1
-rwxr-xr-xscripts/zfs-tests.sh2
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/cmd/libzfs_input_check/Makefile.am7
-rw-r--r--tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c7
-rw-r--r--tests/zfs-tests/include/commands.cfg1
-rw-r--r--tests/zfs-tests/include/tunables.cfg2
-rwxr-xr-xtests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh4
-rwxr-xr-xtests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh4
-rwxr-xr-xtests/zfs-tests/tests/functional/acl/posix/setup.ksh2
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh25
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh10
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh10
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh2
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh2
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore1
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am14
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c174
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh155
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh10
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh10
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh10
-rw-r--r--tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg8
-rwxr-xr-xtests/zfs-tests/tests/functional/history/history_002_pos.ksh4
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh2
134 files changed, 2671 insertions, 524 deletions
diff --git a/META b/META
index 4660386a656f..ab33031b2e2a 100644
--- a/META
+++ b/META
@@ -2,7 +2,7 @@ Meta: 1
Name: zfs
Branch: 1.0
Version: 2.0.0
-Release: rc1
+Release: rc2
Release-Tags: relext
License: CDDL
Author: OpenZFS
diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c
index 87d2ccadcded..ed9f167ccac8 100644
--- a/cmd/mount_zfs/mount_zfs.c
+++ b/cmd/mount_zfs/mount_zfs.c
@@ -182,6 +182,7 @@ main(int argc, char **argv)
int error, c;
(void) setlocale(LC_ALL, "");
+ (void) setlocale(LC_NUMERIC, "C");
(void) textdomain(TEXT_DOMAIN);
opterr = 0;
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index e7211711a41c..c070a1f8c4da 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -5340,11 +5340,6 @@ load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme,
if (txg < metaslab_unflushed_txg(ms))
return (0);
- vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
- ASSERT(vim != NULL);
- if (offset >= vdev_indirect_mapping_max_offset(vim))
- return (0);
-
if (sme->sme_type == SM_ALLOC)
range_tree_add(svr->svr_allocd_segs, offset, size);
else
@@ -5407,9 +5402,6 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
metaslab_t *msp = vd->vdev_ms[msi];
- if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
- break;
-
ASSERT0(range_tree_space(allocs));
if (msp->ms_sm != NULL)
VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC));
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 1a113c5c0382..20579157d7d6 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -8468,6 +8468,7 @@ main(int argc, char **argv)
char **newargv;
(void) setlocale(LC_ALL, "");
+ (void) setlocale(LC_NUMERIC, "C");
(void) textdomain(TEXT_DOMAIN);
opterr = 0;
diff --git a/cmd/zgenhostid/.gitignore b/cmd/zgenhostid/.gitignore
new file mode 100644
index 000000000000..072246c735ba
--- /dev/null
+++ b/cmd/zgenhostid/.gitignore
@@ -0,0 +1 @@
+/zgenhostid
diff --git a/cmd/zgenhostid/Makefile.am b/cmd/zgenhostid/Makefile.am
index 69c99ca9d828..0ba791f7cde0 100644
--- a/cmd/zgenhostid/Makefile.am
+++ b/cmd/zgenhostid/Makefile.am
@@ -1 +1,5 @@
-dist_bin_SCRIPTS = zgenhostid
+include $(top_srcdir)/config/Rules.am
+
+bin_PROGRAMS = zgenhostid
+
+zgenhostid_SOURCES = zgenhostid.c
diff --git a/cmd/zgenhostid/zgenhostid b/cmd/zgenhostid/zgenhostid
deleted file mode 100755
index 8b468740c72b..000000000000
--- a/cmd/zgenhostid/zgenhostid
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-
-# Emulate genhostid(1) available on RHEL/CENTOS, for use on distros
-# which do not provide that utility.
-#
-# Usage:
-# zgenhostid
-# zgenhostid <value>
-#
-# If /etc/hostid already exists and is size > 0, the script exits immediately
-# and changes nothing. Unlike genhostid, this generates an error message.
-#
-# The first form generates a random hostid and stores it in /etc/hostid.
-# The second form checks that the provided value is between 0x1 and 0xFFFFFFFF
-# and if so, stores it in /etc/hostid. This form is not supported by
-# genhostid(1).
-
-hostid_file=/etc/hostid
-
-function usage {
- echo "$0 [value]"
- echo "If $hostid_file is not present, store a hostid in it." >&2
- echo "The optional value must be an 8-digit hex number between" >&2
- echo "1 and 2^32-1. If no value is provided, a random one will" >&2
- echo "be generated. The value must be unique among your systems." >&2
-}
-
-# hostid(1) ignores contents of /etc/hostid if size < 4 bytes. It would
-# be better if this checked size >= 4 bytes but it the method must be
-# widely portable.
-if [ -s $hostid_file ]; then
- echo "$hostid_file already exists. No change made." >&2
- exit 1
-fi
-
-if [ -n "$1" ]; then
- host_id=$1
-else
- # $RANDOM goes from 0..32k-1
- number=$((((RANDOM % 4) * 32768 + RANDOM) * 32768 + RANDOM))
- host_id=$(printf "%08x" $number)
-fi
-
-if egrep -o '^0{8}$' <<< $host_id >/dev/null 2>&1; then
- usage
- exit 2
-fi
-
-if ! egrep -o '^[a-fA-F0-9]{8}$' <<< $host_id >/dev/null 2>&1; then
- usage
- exit 3
-fi
-
-a=${host_id:6:2}
-b=${host_id:4:2}
-c=${host_id:2:2}
-d=${host_id:0:2}
-
-echo -ne \\x$a\\x$b\\x$c\\x$d > $hostid_file
-
-exit 0
diff --git a/cmd/zgenhostid/zgenhostid.c b/cmd/zgenhostid/zgenhostid.c
new file mode 100644
index 000000000000..562262928c77
--- /dev/null
+++ b/cmd/zgenhostid/zgenhostid.c
@@ -0,0 +1,152 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020, Georgy Yakovlev. All rights reserved.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
+
+static void usage(void);
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "usage: zgenhostid [-fh] [-o path] [value]\n\n"
+ " -f\t\t force hostid file write\n"
+ " -h\t\t print this usage and exit\n"
+ " -o <filename>\t write hostid to this file\n\n"
+ "If hostid file is not present, store a hostid in it.\n"
+ "The optional value must be an 8-digit hex number between"
+ "1 and 2^32-1.\n"
+ "If no value is provided, a random one will"
+ "be generated.\n"
+ "The value must be unique among your systems.\n");
+ exit(EXIT_FAILURE);
+ /* NOTREACHED */
+}
+
+int
+main(int argc, char **argv)
+{
+ /* default file path, can be optionally set by user */
+ char path[PATH_MAX] = "/etc/hostid";
+ /* holds converted user input or lrand48() generated value */
+ unsigned long input_i = 0;
+
+ int opt;
+ int pathlen;
+ int force_fwrite = 0;
+ while ((opt = getopt_long(argc, argv, "fo:h?", 0, 0)) != -1) {
+ switch (opt) {
+ case 'f':
+ force_fwrite = 1;
+ break;
+ case 'o':
+ pathlen = snprintf(path, sizeof (path), "%s", optarg);
+ if (pathlen >= sizeof (path)) {
+ fprintf(stderr, "%s\n", strerror(EOVERFLOW));
+ exit(EXIT_FAILURE);
+ } else if (pathlen < 1) {
+ fprintf(stderr, "%s\n", strerror(EINVAL));
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'h':
+ case '?':
+ usage();
+ }
+ }
+
+ char *in_s = argv[optind];
+ if (in_s != NULL) {
+ /* increment pointer by 2 if string is 0x prefixed */
+ if (strncasecmp("0x", in_s, 2) == 0) {
+ in_s += 2;
+ }
+
+ /* need to be exactly 8 characters */
+ const char *hex = "0123456789abcdefABCDEF";
+ if (strlen(in_s) != 8 || strspn(in_s, hex) != 8) {
+ fprintf(stderr, "%s\n", strerror(ERANGE));
+ usage();
+ }
+
+ input_i = strtoul(in_s, NULL, 16);
+ if (errno != 0) {
+ perror("strtoul");
+ exit(EXIT_FAILURE);
+ }
+
+ if (input_i < 0x1 || input_i > UINT32_MAX) {
+ fprintf(stderr, "%s\n", strerror(ERANGE));
+ usage();
+ }
+ }
+
+ struct stat fstat;
+ if (force_fwrite == 0 && stat(path, &fstat) == 0 &&
+ S_ISREG(fstat.st_mode)) {
+ fprintf(stderr, "%s: %s\n", path, strerror(EEXIST));
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * generate if not provided by user
+ * also handle unlikely zero return from lrand48()
+ */
+ while (input_i == 0) {
+ srand48(getpid() ^ time(NULL));
+ input_i = lrand48();
+ }
+
+ FILE *fp = fopen(path, "wb");
+ if (!fp) {
+ perror("fopen");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * we need just 4 bytes in native endianess
+ * not using sethostid() because it may be missing or just a stub
+ */
+ uint32_t hostid = input_i;
+ int written = fwrite(&hostid, 1, 4, fp);
+ if (written != 4) {
+ perror("fwrite");
+ exit(EXIT_FAILURE);
+ }
+
+ fclose(fp);
+ exit(EXIT_SUCCESS);
+}
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index adbb78a8effd..f612db48d4f9 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -2654,6 +2654,13 @@ show_import(nvlist_t *config)
errata);
break;
+ case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
+ printf_color(ANSI_BOLD, gettext("status: "));
+ printf_color(ANSI_YELLOW, gettext("One or more devices are "
+ "configured to use a non-native block size.\n"
+ "\tExpect reduced performance.\n"));
+ break;
+
default:
/*
* No other status can be seen when importing pools.
@@ -10229,6 +10236,7 @@ main(int argc, char **argv)
char **newargv;
(void) setlocale(LC_ALL, "");
+ (void) setlocale(LC_NUMERIC, "C");
(void) textdomain(TEXT_DOMAIN);
srand(time(NULL));
diff --git a/config/deb.am b/config/deb.am
index 88679545a594..79063e407fe3 100644
--- a/config/deb.am
+++ b/config/deb.am
@@ -35,7 +35,7 @@ deb-dkms: deb-local rpm-dkms
fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \
$(RM) $$pkg1
-deb-utils: deb-local rpm-utils
+deb-utils: deb-local rpm-utils-initramfs
name=${PACKAGE}; \
version=${VERSION}-${RELEASE}; \
arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
diff --git a/config/find_system_library.m4 b/config/find_system_library.m4
index 9a95d6a15033..310b44112aea 100644
--- a/config/find_system_library.m4
+++ b/config/find_system_library.m4
@@ -11,10 +11,12 @@ AC_DEFUN([ZFS_AC_FIND_SYSTEM_LIBRARY], [
_header_found=
_library_found=
+ _pc_found=
AS_IF([test -n "$2"], [PKG_CHECK_MODULES([$1], [$2], [
_header_found=1
_library_found=1
+ _pc_found=1
], [:])])
# set _header_found/_library_found if the user passed in CFLAGS/LIBS
@@ -82,6 +84,9 @@ AC_DEFUN([ZFS_AC_FIND_SYSTEM_LIBRARY], [
AS_IF([test "x$_header_found" = "x1" && test "x$_library_found" = "x1"], [
AC_SUBST([$1]_CFLAGS)
AC_SUBST([$1]_LIBS)
+ AS_IF([test "x$_pc_found" = "x1"], [
+ AC_SUBST([$1]_PC, [$2])
+ ])
AC_DEFINE([HAVE_][$1], [1], [Define if you have [$5]])
$7
],[dnl ELSE
diff --git a/config/rpm.am b/config/rpm.am
index 9dd69ade333e..13bd54a625b0 100644
--- a/config/rpm.am
+++ b/config/rpm.am
@@ -7,7 +7,7 @@
###############################################################################
PHONY += srpm srpms srpm-kmod srpm-dkms srpm-utils
-PHONY += rpm rpms rpm-kmod rpm-dkms rpm-utils
+PHONY += rpm rpms rpm-kmod rpm-dkms rpm-utils rpm-utils-initramfs
PHONY += srpm-common rpm-common rpm-local
srpm-kmod srpm-dkms srpm-utils: dist
@@ -35,10 +35,22 @@ rpm-dkms: srpm-dkms
$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}-dkms" \
def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_DKMS}' rpm-common
+# The rpm-utils and rpm-utils-initramfs targets are identical except for the
+# zfs-initramfs package: rpm-utils never includes it, rpm-utils-initramfs
+# includes it if detected at configure time. The zfs-initramfs package does
+# not work on any known RPM-based distribution and the resulting RPM is only
+# used to create a Debian package. The rpm-utils-initramfs target is not
+# intended to be specified by the user directly, it is provided as a
+# dependency of the deb-utils target.
+
rpm-utils: srpm-utils
$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}" \
def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_UTIL}' rpm-common
+rpm-utils-initramfs: srpm-utils
+ $(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}" \
+ def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_UTIL} ${RPM_DEFINE_INITRAMFS}' rpm-common
+
rpm: rpm-kmod rpm-dkms rpm-utils
rpms: rpm-kmod rpm-dkms rpm-utils
diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
index 2f0aca830598..7754eda3f6a2 100644
--- a/config/zfs-build.m4
+++ b/config/zfs-build.m4
@@ -282,7 +282,6 @@ AC_DEFUN([ZFS_AC_RPM], [
AS_IF([test -n "$udevruledir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
])
- RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_INITRAMFS)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
@@ -542,13 +541,13 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
AC_MSG_CHECKING([whether initramfs-tools is available])
if test -d /usr/share/initramfs-tools ; then
- DEFINE_INITRAMFS='--define "_initramfs 1"'
+ RPM_DEFINE_INITRAMFS='--define "_initramfs 1"'
AC_MSG_RESULT([yes])
else
- DEFINE_INITRAMFS=''
+ RPM_DEFINE_INITRAMFS=''
AC_MSG_RESULT([no])
fi
- AC_SUBST(DEFINE_INITRAMFS)
+ AC_SUBST(RPM_DEFINE_INITRAMFS)
])
dnl #
diff --git a/configure.ac b/configure.ac
index 199187ce51bb..f149ab6d1b83 100644
--- a/configure.ac
+++ b/configure.ac
@@ -161,6 +161,8 @@ AC_CONFIG_FILES([
lib/libuutil/Makefile
lib/libzfs/Makefile
lib/libzfs/libzfs.pc
+ lib/libzfsbootenv/Makefile
+ lib/libzfsbootenv/libzfsbootenv.pc
lib/libzfs_core/Makefile
lib/libzfs_core/libzfs_core.pc
lib/libzpool/Makefile
diff --git a/contrib/dracut/90zfs/module-setup.sh.in b/contrib/dracut/90zfs/module-setup.sh.in
index 7e7a96d6e0a9..5b746049fb23 100755
--- a/contrib/dracut/90zfs/module-setup.sh.in
+++ b/contrib/dracut/90zfs/module-setup.sh.in
@@ -5,7 +5,7 @@ check() {
[ "${1}" = "-d" ] && return 0
# Verify the zfs tool chain
- for tool in "@sbindir@/zpool" "@sbindir@/zfs" "@mounthelperdir@/mount.zfs" ; do
+ for tool in "@bindir@/zgenhostid" "@sbindir@/zpool" "@sbindir@/zfs" "@mounthelperdir@/mount.zfs" ; do
test -x "$tool" || return 1
done
# Verify grep exists
@@ -38,6 +38,7 @@ install() {
inst_rules @udevruledir@/60-zvol.rules
dracut_install hostid
dracut_install grep
+ dracut_install @bindir@/zgenhostid
dracut_install @sbindir@/zfs
dracut_install @sbindir@/zpool
# Workaround for zfsonlinux/zfs#4749 by ensuring libgcc_s.so(.1) is included
@@ -83,11 +84,7 @@ install() {
fi
# Synchronize initramfs and system hostid
- AA=`hostid | cut -b 1,2`
- BB=`hostid | cut -b 3,4`
- CC=`hostid | cut -b 5,6`
- DD=`hostid | cut -b 7,8`
- echo -ne "\\x${DD}\\x${CC}\\x${BB}\\x${AA}" > "${initdir}/etc/hostid"
+ zgenhostid -o "${initdir}/etc/hostid" "$(hostid)"
if dracut_module_included "systemd"; then
mkdir -p "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"
diff --git a/contrib/dracut/90zfs/parse-zfs.sh.in b/contrib/dracut/90zfs/parse-zfs.sh.in
index 6a805ae24a5a..2ff76d8fa080 100755
--- a/contrib/dracut/90zfs/parse-zfs.sh.in
+++ b/contrib/dracut/90zfs/parse-zfs.sh.in
@@ -6,11 +6,7 @@
spl_hostid=$(getarg spl_hostid=)
if [ -n "${spl_hostid}" ] ; then
info "ZFS: Using hostid from command line: ${spl_hostid}"
- AA=$(echo "${spl_hostid}" | cut -b 1,2)
- BB=$(echo "${spl_hostid}" | cut -b 3,4)
- CC=$(echo "${spl_hostid}" | cut -b 5,6)
- DD=$(echo "${spl_hostid}" | cut -b 7,8)
- echo -ne "\\x${DD}\\x${CC}\\x${BB}\\x${AA}" >/etc/hostid
+ zgenhostid -f "${spl_hostid}"
elif [ -f "/etc/hostid" ] ; then
info "ZFS: Using hostid from /etc/hostid: $(hostid)"
else
diff --git a/include/Makefile.am b/include/Makefile.am
index 0e997deaf173..17286ecbb7fd 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -15,6 +15,7 @@ USER_H = \
libuutil.h \
libuutil_impl.h \
libzfs.h \
+ libzfsbootenv.h \
libzfs_core.h \
libzfs_impl.h \
libzutil.h \
diff --git a/include/libzfs.h b/include/libzfs.h
index 6b4f518a4a86..e0b2676a441f 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -892,8 +892,8 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
* Label manipulation.
*/
extern int zpool_clear_label(int);
-extern int zpool_set_bootenv(zpool_handle_t *, const char *);
-extern int zpool_get_bootenv(zpool_handle_t *, char *, size_t, off_t);
+extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *);
+extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **);
/*
* Management interfaces for SMB ACL files
diff --git a/include/libzfs_core.h b/include/libzfs_core.h
index e69fe32cd0a1..34161a06fb45 100644
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -135,7 +135,7 @@ int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
-int lzc_set_bootenv(const char *, const char *);
+int lzc_set_bootenv(const char *, const nvlist_t *);
int lzc_get_bootenv(const char *, nvlist_t **);
#ifdef __cplusplus
}
diff --git a/include/libzfsbootenv.h b/include/libzfsbootenv.h
new file mode 100644
index 000000000000..b078b605db7f
--- /dev/null
+++ b/include/libzfsbootenv.h
@@ -0,0 +1,41 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _LIBZFSBOOTENV_H
+#define _LIBZFSBOOTENV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum lzbe_flags {
+ lzbe_add, /* add data to existing nvlist */
+ lzbe_replace /* replace current nvlist */
+} lzbe_flags_t;
+
+extern int lzbe_nvlist_get(const char *, const char *, void **);
+extern int lzbe_nvlist_set(const char *, const char *, void *);
+extern void lzbe_nvlist_free(void *);
+extern int lzbe_add_pair(void *, const char *, const char *, void *, size_t);
+extern int lzbe_remove_pair(void *, const char *);
+extern int lzbe_set_boot_device(const char *, lzbe_flags_t, const char *);
+extern int lzbe_get_boot_device(const char *, char **);
+extern int lzbe_bootenv_print(const char *, const char *, FILE *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBZFSBOOTENV_H */
diff --git a/include/os/freebsd/spl/sys/ccompile.h b/include/os/freebsd/spl/sys/ccompile.h
index 7268bd1d73a6..bffe6d21e31d 100644
--- a/include/os/freebsd/spl/sys/ccompile.h
+++ b/include/os/freebsd/spl/sys/ccompile.h
@@ -206,8 +206,10 @@ typedef int enum_t;
#define __XSI_VISIBLE 1000
#endif
#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
-#define open64 open
#define mmap64 mmap
+/* Note: this file can be used on linux/macOS when bootstrapping tools. */
+#if defined(__FreeBSD__)
+#define open64 open
#define pwrite64 pwrite
#define ftruncate64 ftruncate
#define lseek64 lseek
@@ -217,6 +219,7 @@ typedef int enum_t;
#define statfs64 statfs
#define readdir64 readdir
#define dirent64 dirent
+#endif
#define P2ALIGN(x, align) ((x) & -(align))
#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
#define P2ROUNDUP(x, align) ((((x) - 1) | ((align) - 1)) + 1)
diff --git a/include/os/freebsd/spl/sys/condvar.h b/include/os/freebsd/spl/sys/condvar.h
index a42995793bc2..a634ab6b6807 100644
--- a/include/os/freebsd/spl/sys/condvar.h
+++ b/include/os/freebsd/spl/sys/condvar.h
@@ -142,8 +142,14 @@ cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t timo)
return (1);
}
-#define cv_timedwait_io cv_timedwait
-#define cv_timedwait_sig_io cv_timedwait_sig
+#define cv_timedwait_io cv_timedwait
+#define cv_timedwait_idle cv_timedwait
+#define cv_timedwait_sig_io cv_timedwait_sig
+#define cv_wait_io cv_wait
+#define cv_wait_io_sig cv_wait_sig
+#define cv_wait_idle cv_wait
+#define cv_timedwait_io_hires cv_timedwait_hires
+#define cv_timedwait_idle_hires cv_timedwait_hires
static inline int
cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
diff --git a/include/os/freebsd/zfs/sys/Makefile.am b/include/os/freebsd/zfs/sys/Makefile.am
index 6a65a7326066..bf5cc39eba74 100644
--- a/include/os/freebsd/zfs/sys/Makefile.am
+++ b/include/os/freebsd/zfs/sys/Makefile.am
@@ -2,6 +2,7 @@ KERNEL_H = \
freebsd_crypto.h \
sha2.h \
vdev_os.h \
+ zfs_bootenv_os.h \
zfs_context_os.h \
zfs_ctldir.h \
zfs_dir.h \
diff --git a/include/os/freebsd/zfs/sys/zfs_bootenv_os.h b/include/os/freebsd/zfs/sys/zfs_bootenv_os.h
new file mode 100644
index 000000000000..80c71a6c506d
--- /dev/null
+++ b/include/os/freebsd/zfs/sys/zfs_bootenv_os.h
@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_OS_H
+#define _ZFS_BOOTENV_OS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BOOTENV_OS BE_FREEBSD_VENDOR
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_OS_H */
diff --git a/include/os/freebsd/zfs/sys/zfs_context_os.h b/include/os/freebsd/zfs/sys/zfs_context_os.h
index 0a2f0bfaaa70..0316f93b27ec 100644
--- a/include/os/freebsd/zfs/sys/zfs_context_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_context_os.h
@@ -41,9 +41,6 @@
#include <sys/ccompat.h>
#include <linux/types.h>
-#define cv_wait_io(cv, mp) cv_wait(cv, mp)
-#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp)
-
#define cond_resched() kern_yield(PRI_USER)
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
@@ -75,7 +72,7 @@ extern struct mtx zfs_debug_mtx;
} \
} while (0)
-#define MSEC_TO_TICK(msec) ((msec) / (MILLISEC / hz))
+#define MSEC_TO_TICK(msec) (howmany((hrtime_t)(msec) * hz, MILLISEC))
extern int hz;
extern int tick;
typedef int fstrans_cookie_t;
@@ -84,7 +81,6 @@ typedef int fstrans_cookie_t;
#define signal_pending(x) SIGPENDING(x)
#define current curthread
#define thread_join(x)
-#define cv_wait_io(cv, mp) cv_wait(cv, mp)
typedef struct opensolaris_utsname utsname_t;
extern utsname_t *utsname(void);
extern int spa_import_rootpool(const char *name, bool checkpointrewind);
diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index 1b80ee7cb177..e816e393378a 100644
--- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -27,18 +27,31 @@
#ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H
+#if __FreeBSD_version >= 1300109
+#define TEARDOWN_INACTIVE_RMS
+#endif
+
#include <sys/dataset_kstats.h>
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
#include <sys/sa.h>
#include <sys/rrwlock.h>
+#ifdef TEARDOWN_INACTIVE_RMS
+#include <sys/rmlock.h>
+#endif
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
#endif
+#ifdef TEARDOWN_INACTIVE_RMS
+typedef struct rmslock zfs_teardown_lock_t;
+#else
+#define zfs_teardown_lock_t krwlock_t
+#endif
+
typedef struct zfsvfs zfsvfs_t;
struct znode;
@@ -67,7 +80,7 @@ struct zfsvfs {
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_unmounted; /* unmounted */
rrmlock_t z_teardown_lock;
- krwlock_t z_teardown_inactive_lock;
+ zfs_teardown_lock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
@@ -98,6 +111,56 @@ struct zfsvfs {
struct task z_unlinked_drain_task;
};
+#ifdef TEARDOWN_INACTIVE_RMS
+#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive")
+
+#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_destroy(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_try_rlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_rlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_runlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_wlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rms_wunlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
+ rms_wowned(&(zfsvfs)->z_teardown_inactive_lock)
+#else
+#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL)
+
+#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_destroy(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
+
+#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
+
+#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER)
+
+#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \
+ rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \
+ RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock)
+#endif
+
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
/*
* Normal filesystems (those not under .zfs/snapshot) have a total
diff --git a/include/os/linux/kernel/linux/mod_compat.h b/include/os/linux/kernel/linux/mod_compat.h
index 4b83fe413334..1c48df5cbd81 100644
--- a/include/os/linux/kernel/linux/mod_compat.h
+++ b/include/os/linux/kernel/linux/mod_compat.h
@@ -21,6 +21,7 @@
/*
* Copyright (C) 2016 Gvozden Neskovic <neskovic@gmail.com>.
+ * Copyright (c) 2020 by Delphix. All rights reserved.
*/
#ifndef _MOD_COMPAT_H
@@ -71,6 +72,7 @@ enum scope_prefix_types {
zfs_txg,
zfs_vdev,
zfs_vdev_cache,
+ zfs_vdev_file,
zfs_vdev_mirror,
zfs_zevent,
zfs_zio,
diff --git a/include/os/linux/spl/sys/condvar.h b/include/os/linux/spl/sys/condvar.h
index 22408824f85b..fa321403bf74 100644
--- a/include/os/linux/spl/sys/condvar.h
+++ b/include/os/linux/spl/sys/condvar.h
@@ -80,15 +80,19 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *);
extern void __cv_destroy(kcondvar_t *);
extern void __cv_wait(kcondvar_t *, kmutex_t *);
extern void __cv_wait_io(kcondvar_t *, kmutex_t *);
+extern void __cv_wait_idle(kcondvar_t *, kmutex_t *);
extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *);
extern int __cv_wait_sig(kcondvar_t *, kmutex_t *);
extern int __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
extern int __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
extern int __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
+extern int __cv_timedwait_idle(kcondvar_t *, kmutex_t *, clock_t);
extern int cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
extern int cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
+extern int cv_timedwait_idle_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+ hrtime_t res, int flag);
extern void __cv_signal(kcondvar_t *);
extern void __cv_broadcast(kcondvar_t *c);
@@ -96,6 +100,7 @@ extern void __cv_broadcast(kcondvar_t *c);
#define cv_destroy(cvp) __cv_destroy(cvp)
#define cv_wait(cvp, mp) __cv_wait(cvp, mp)
#define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp)
+#define cv_wait_idle(cvp, mp) __cv_wait_idle(cvp, mp)
#define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp)
#define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp)
#define cv_signal(cvp) __cv_signal(cvp)
@@ -109,5 +114,7 @@ extern void __cv_broadcast(kcondvar_t *c);
#define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t)
#define cv_timedwait_io(cvp, mp, t) __cv_timedwait_io(cvp, mp, t)
#define cv_timedwait_sig(cvp, mp, t) __cv_timedwait_sig(cvp, mp, t)
+#define cv_timedwait_idle(cvp, mp, t) __cv_timedwait_idle(cvp, mp, t)
+
#endif /* _SPL_CONDVAR_H */
diff --git a/include/os/linux/zfs/sys/Makefile.am b/include/os/linux/zfs/sys/Makefile.am
index b56e6771d28a..a5f2502d20e8 100644
--- a/include/os/linux/zfs/sys/Makefile.am
+++ b/include/os/linux/zfs/sys/Makefile.am
@@ -16,6 +16,7 @@ KERNEL_H = \
trace_zil.h \
trace_zio.h \
trace_zrlock.h \
+ zfs_bootenv_os.h \
zfs_context_os.h \
zfs_ctldir.h \
zfs_dir.h \
diff --git a/include/os/linux/zfs/sys/zfs_bootenv_os.h b/include/os/linux/zfs/sys/zfs_bootenv_os.h
new file mode 100644
index 000000000000..7b2f083adcd4
--- /dev/null
+++ b/include/os/linux/zfs/sys/zfs_bootenv_os.h
@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_OS_H
+#define _ZFS_BOOTENV_OS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BOOTENV_OS BE_LINUX_VENDOR
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_OS_H */
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index 75727b93aacd..a944c5ea834d 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -102,6 +102,7 @@ COMMON_H = \
zcp_set.h \
zfeature.h \
zfs_acl.h \
+ zfs_bootenv.h \
zfs_context.h \
zfs_debug.h \
zfs_delay.h \
diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h
index 957963ffe553..0bb602e8f7ff 100644
--- a/include/sys/dsl_synctask.h
+++ b/include/sys/dsl_synctask.h
@@ -112,11 +112,11 @@ void dsl_sync_task_sync(dsl_sync_task_t *, dmu_tx_t *);
int dsl_sync_task(const char *, dsl_checkfunc_t *,
dsl_syncfunc_t *, void *, int, zfs_space_check_t);
void dsl_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *,
- void *, int, zfs_space_check_t, dmu_tx_t *);
+ void *, dmu_tx_t *);
int dsl_early_sync_task(const char *, dsl_checkfunc_t *,
dsl_syncfunc_t *, void *, int, zfs_space_check_t);
void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *,
- void *, int, zfs_space_check_t, dmu_tx_t *);
+ void *, dmu_tx_t *);
int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *,
dsl_sigfunc_t *, void *, int, zfs_space_check_t);
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index 9bfb123c76fe..6491606d328b 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
#ifndef _SYS_FM_FS_ZFS_H
#define _SYS_FM_FS_ZFS_H
@@ -88,6 +92,7 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS "zio_flags"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY "zio_priority"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
diff --git a/include/sys/fm/util.h b/include/sys/fm/util.h
index ff54b05bb6af..ea8c61a8b9bd 100644
--- a/include/sys/fm/util.h
+++ b/include/sys/fm/util.h
@@ -104,6 +104,9 @@ extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);
extern void zfs_zevent_init(zfs_zevent_t **);
extern void zfs_zevent_destroy(zfs_zevent_t *);
+extern void zfs_zevent_track_duplicate(void);
+extern void zfs_ereport_init(void);
+extern void zfs_ereport_fini(void);
#else
static inline void fm_init(void) { }
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index f6f633a95b7e..fe63d735babc 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1336,8 +1336,8 @@ typedef enum zfs_ioc {
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
- ZFS_IOC_SET_BOOTENV, /* 0x87 (Linux) */
- ZFS_IOC_GET_BOOTENV, /* 0x88 (Linux) */
+ ZFS_IOC_SET_BOOTENV, /* 0x87 */
+ ZFS_IOC_GET_BOOTENV, /* 0x88 */
ZFS_IOC_LAST
} zfs_ioc_t;
diff --git a/include/sys/spa.h b/include/sys/spa.h
index e53d0d64c302..ddce8cc914f8 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -1145,10 +1145,10 @@ extern const char *spa_state_to_name(spa_t *spa);
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
- const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
- uint64_t length);
+ const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
zio_t *zio);
+extern void zfs_ereport_taskq_fini(void);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 797065fdd0a5..309ce33be067 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -94,7 +94,6 @@ extern void vdev_rele(vdev_t *);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
-extern void vdev_ashift_optimize(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd, char *tag);
@@ -181,7 +180,7 @@ extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv);
extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
offset, uint64_t size, zio_done_func_t *done, void *priv, int flags);
extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
-extern int vdev_label_write_bootenv(vdev_t *, char *);
+extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 90d607746013..3c4c3fb5a279 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -476,7 +476,16 @@ typedef struct vdev_phys {
} vdev_phys_t;
typedef enum vbe_vers {
- /* The bootenv file is stored as ascii text in the envblock */
+ /*
+ * The bootenv file is stored as ascii text in the envblock.
+ * It is used by the GRUB bootloader used on Linux to store the
+ * contents of the grubenv file. The file is stored as raw ASCII,
+ * and is protected by an embedded checksum. By default, GRUB will
+ * check if the boot filesystem supports storing the environment data
+ * in a special location, and if so, will invoke filesystem specific
+ * logic to retrieve it. This can be overriden by a variable, should
+ * the user so desire.
+ */
VB_RAW = 0,
/*
diff --git a/include/sys/zfs_bootenv.h b/include/sys/zfs_bootenv.h
new file mode 100644
index 000000000000..7af0a57dd008
--- /dev/null
+++ b/include/sys/zfs_bootenv.h
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_H
+#define _ZFS_BOOTENV_H
+
+/*
+ * Define macros for label bootenv nvlist pair keys.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BOOTENV_VERSION "version"
+
+#define BE_ILLUMOS_VENDOR "illumos"
+#define BE_FREEBSD_VENDOR "freebsd"
+#define BE_GRUB_VENDOR "grub"
+#define BE_LINUX_VENDOR "linux"
+
+#include <sys/zfs_bootenv_os.h>
+
+#define GRUB_ENVMAP BE_GRUB_VENDOR ":" "envmap"
+
+#define FREEBSD_BOOTONCE BE_FREEBSD_VENDOR ":" "bootonce"
+#define FREEBSD_BOOTONCE_USED BE_FREEBSD_VENDOR ":" "bootonce-used"
+#define FREEBSD_NVSTORE BE_FREEBSD_VENDOR ":" "nvstore"
+#define ILLUMOS_BOOTONCE BE_ILLUMOS_VENDOR ":" "bootonce"
+#define ILLUMOS_BOOTONCE_USED BE_ILLUMOS_VENDOR ":" "bootonce-used"
+#define ILLUMOS_NVSTORE BE_ILLUMOS_VENDOR ":" "nvstore"
+
+#define OS_BOOTONCE BOOTENV_OS ":" "bootonce"
+#define OS_BOOTONCE_USED BOOTENV_OS ":" "bootonce-used"
+#define OS_NVSTORE BOOTENV_OS ":" "nvstore"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_H */
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 16df302c8f31..e33f52c176a8 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -325,11 +325,15 @@ extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
#define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at)
+#define cv_timedwait_idle(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_wait_io(cv, mp) cv_wait(cv, mp)
+#define cv_wait_idle(cv, mp) cv_wait(cv, mp)
#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp)
#define cv_timedwait_sig_hires(cv, mp, t, r, f) \
cv_timedwait_hires(cv, mp, t, r, f)
+#define cv_timedwait_idle_hires(cv, mp, t, r, f) \
+ cv_timedwait_hires(cv, mp, t, r, f)
/*
* Thread-specific data
@@ -598,9 +602,9 @@ typedef struct vsecattr {
extern void delay(clock_t ticks);
#define SEC_TO_TICK(sec) ((sec) * hz)
-#define MSEC_TO_TICK(msec) ((msec) / (MILLISEC / hz))
-#define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz))
-#define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz))
+#define MSEC_TO_TICK(msec) (howmany((hrtime_t)(msec) * hz, MILLISEC))
+#define USEC_TO_TICK(usec) (howmany((hrtime_t)(usec) * hz, MICROSEC))
+#define NSEC_TO_TICK(nsec) (howmany((hrtime_t)(nsec) * hz, NANOSEC))
#define max_ncpus 64
#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 136075a1fc18..53629cfc2c3f 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -67,7 +67,7 @@ extern "C" {
* Property values for acltype
*/
#define ZFS_ACLTYPE_OFF 0
-#define ZFS_ACLTYPE_POSIXACL 1
+#define ZFS_ACLTYPE_POSIX 1
/*
* Field manipulation macros for the drr_versioninfo field of the
diff --git a/include/sys/zio.h b/include/sys/zio.h
index f3b5a120793f..4959831716b5 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
@@ -680,7 +680,7 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
diff --git a/lib/Makefile.am b/lib/Makefile.am
index dda87e41c0aa..f049288a1ae7 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -6,9 +6,13 @@ if BUILD_LINUX
SUBDIRS += libefi
endif
+# libnvpair is installed as part of the final build product
+# libzutil depends on it, so it must be compiled before libzutil
+SUBDIRS += libnvpair
+
# libzutil depends on libefi if present
SUBDIRS += libzutil libunicode
# These five libraries, which are installed as the final build product,
# incorporate the eight convenience libraries given above.
-SUBDIRS += libuutil libnvpair libzfs_core libzfs libzpool
+SUBDIRS += libuutil libzfs_core libzfs libzpool libzfsbootenv
diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
index 2cb093f9601d..14bf57aa1cde 100644
--- a/lib/libefi/rdwr_efi.c
+++ b/lib/libefi/rdwr_efi.c
@@ -44,6 +44,7 @@
#include <sys/byteorder.h>
#include <sys/vdev_disk.h>
#include <linux/fs.h>
+#include <linux/blkpg.h>
static struct uuid_to_ptag {
struct uuid uuid;
@@ -209,19 +210,40 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
return (0);
}
+/*
+ * Return back the device name associated with the file descriptor. The
+ * caller is responsible for freeing the memory associated with the
+ * returned string.
+ */
+static char *
+efi_get_devname(int fd)
+{
+ char *path;
+ char *dev_name;
+
+ path = calloc(1, PATH_MAX);
+ if (path == NULL)
+ return (NULL);
+
+ /*
+ * The libefi API only provides the open fd and not the file path.
+ * To handle this realpath(3) is used to resolve the block device
+ * name from /proc/self/fd/<fd>.
+ */
+ (void) sprintf(path, "/proc/self/fd/%d", fd);
+ dev_name = realpath(path, NULL);
+ free(path);
+ return (dev_name);
+}
+
static int
efi_get_info(int fd, struct dk_cinfo *dki_info)
{
- char *path;
char *dev_path;
int rval = 0;
memset(dki_info, 0, sizeof (*dki_info));
- path = calloc(1, PATH_MAX);
- if (path == NULL)
- goto error;
-
/*
* The simplest way to get the partition number under linux is
* to parse it out of the /dev/<disk><partition> block device name.
@@ -229,16 +251,10 @@ efi_get_info(int fd, struct dk_cinfo *dki_info)
* populates /dev/ so it may be trusted. The tricky bit here is
* that the naming convention is based on the block device type.
* So we need to take this in to account when parsing out the
- * partition information. Another issue is that the libefi API
- * API only provides the open fd and not the file path. To handle
- * this realpath(3) is used to resolve the block device name from
- * /proc/self/fd/<fd>. Aside from the partition number we collect
+ * partition information. Aside from the partition number we collect
* some additional device info.
*/
- (void) sprintf(path, "/proc/self/fd/%d", fd);
- dev_path = realpath(path, NULL);
- free(path);
-
+ dev_path = efi_get_devname(fd);
if (dev_path == NULL)
goto error;
@@ -1108,20 +1124,49 @@ check_input(struct dk_gpt *vtoc)
return (0);
}
+static int
+call_blkpg_ioctl(int fd, int command, diskaddr_t start,
+ diskaddr_t size, uint_t pno)
+{
+ struct blkpg_ioctl_arg ioctl_arg;
+ struct blkpg_partition linux_part;
+ memset(&linux_part, 0, sizeof (linux_part));
+
+ char *path = efi_get_devname(fd);
+ if (path == NULL) {
+ (void) fprintf(stderr, "failed to retrieve device name\n");
+ return (VT_EINVAL);
+ }
+
+ linux_part.start = start;
+ linux_part.length = size;
+ linux_part.pno = pno;
+ snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
+ linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
+ free(path);
+
+ ioctl_arg.op = command;
+ ioctl_arg.flags = 0;
+ ioctl_arg.datalen = sizeof (struct blkpg_partition);
+ ioctl_arg.data = &linux_part;
+
+ return (ioctl(fd, BLKPG, &ioctl_arg));
+}
+
/*
* add all the unallocated space to the current label
*/
int
efi_use_whole_disk(int fd)
{
- struct dk_gpt *efi_label = NULL;
- int rval;
- int i;
- uint_t resv_index = 0, data_index = 0;
- diskaddr_t resv_start = 0, data_start = 0;
- diskaddr_t data_size, limit, difference;
- boolean_t sync_needed = B_FALSE;
- uint_t nblocks;
+ struct dk_gpt *efi_label = NULL;
+ int rval;
+ int i;
+ uint_t resv_index = 0, data_index = 0;
+ diskaddr_t resv_start = 0, data_start = 0;
+ diskaddr_t data_size, limit, difference;
+ boolean_t sync_needed = B_FALSE;
+ uint_t nblocks;
rval = efi_alloc_and_read(fd, &efi_label);
if (rval < 0) {
@@ -1255,19 +1300,73 @@ efi_use_whole_disk(int fd)
efi_label->efi_parts[resv_index].p_start += difference;
efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
- rval = efi_write(fd, efi_label);
- if (rval < 0) {
- if (efi_debug) {
- (void) fprintf(stderr,
- "efi_use_whole_disk:fail to write label, rval=%d\n",
- rval);
+ /*
+ * Rescanning the partition table in the kernel can result
+ * in the device links to be removed (see comment in vdev_disk_open).
+ * If BLKPG_RESIZE_PARTITION is available, then we can resize
+ * the partition table online and avoid having to remove the device
+ * links used by the pool. This provides a very deterministic
+ * approach to resizing devices and does not require any
+ * loops waiting for devices to reappear.
+ */
+#ifdef BLKPG_RESIZE_PARTITION
+ /*
+ * Delete the reserved partition since we're about to expand
+ * the data partition and it would overlap with the reserved
+ * partition.
+ * NOTE: The starting index for the ioctl is 1 while for the
+ * EFI partitions it's 0. For that reason we have to add one
+ * whenever we make an ioctl call.
+ */
+ rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
+ if (rval != 0)
+ goto out;
+
+ /*
+ * Expand the data partition
+ */
+ rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
+ efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
+ efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
+ data_index + 1);
+ if (rval != 0) {
+ (void) fprintf(stderr, "Unable to resize data "
+ "partition: %d\n", rval);
+ /*
+ * Since we failed to resize, we need to reset the start
+ * of the reserve partition and re-create it.
+ */
+ efi_label->efi_parts[resv_index].p_start -= difference;
+ }
+
+ /*
+ * Re-add the reserved partition. If we've expanded the data partition
+ * then we'll move the reserve partition to the end of the data
+ * partition. Otherwise, we'll recreate the partition in its original
+ * location. Note that we do this as best-effort and ignore any
+ * errors that may arise here. This will ensure that we finish writing
+ * the EFI label.
+ */
+ (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
+ efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
+ efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
+ resv_index + 1);
+#endif
+
+ /*
+ * We're now ready to write the EFI label.
+ */
+ if (rval == 0) {
+ rval = efi_write(fd, efi_label);
+ if (rval < 0 && efi_debug) {
+ (void) fprintf(stderr, "efi_use_whole_disk:fail "
+ "to write label, rval=%d\n", rval);
}
- efi_free(efi_label);
- return (rval);
}
+out:
efi_free(efi_label);
- return (0);
+ return (rval);
}
/*
diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
index bc949213f69c..a6a9b33d7655 100644
--- a/lib/libshare/os/linux/nfs.c
+++ b/lib/libshare/os/linux/nfs.c
@@ -31,6 +31,7 @@
#include <string.h>
#include <strings.h>
#include <errno.h>
+#include <fcntl.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/types.h>
diff --git a/lib/libspl/include/os/freebsd/sys/stat.h b/lib/libspl/include/os/freebsd/sys/stat.h
index 82c86262fff3..07f9762f09ee 100644
--- a/lib/libspl/include/os/freebsd/sys/stat.h
+++ b/lib/libspl/include/os/freebsd/sys/stat.h
@@ -28,6 +28,8 @@
#include_next <sys/stat.h>
+/* Note: this file can be used on linux/macOS when bootstrapping tools. */
+#if defined(__FreeBSD__)
#include <sys/mount.h> /* for BLKGETSIZE64 */
#define stat64 stat
@@ -68,4 +70,5 @@ fstat64_blk(int fd, struct stat64 *st)
return (0);
}
+#endif /* defined(__FreeBSD__) */
#endif /* _LIBSPL_SYS_STAT_H */
diff --git a/lib/libzfs/libzfs.pc.in b/lib/libzfs/libzfs.pc.in
index 6caf49d221f1..afe5635ae633 100644
--- a/lib/libzfs/libzfs.pc.in
+++ b/lib/libzfs/libzfs.pc.in
@@ -6,9 +6,9 @@ includedir=@includedir@
Name: libzfs
Description: LibZFS library
Version: @VERSION@
-URL: https://zfsonlinux.org
+URL: https://github.com/openzfs/zfs
Requires: libzfs_core
-Requires.private: libcrypto zlib
+Requires.private: @LIBCRYPTO_PC@ @ZLIB_PC@
Cflags: -I${includedir}/libzfs -I${includedir}/libspl
Libs: -L${libdir} -lzfs -lnvpair
Libs.private: -luutil -lm -pthread
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 2c707f23f4b0..1eaed435c156 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -2893,11 +2893,12 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
case ZFS_PROP_GUID:
case ZFS_PROP_CREATETXG:
case ZFS_PROP_OBJSETID:
+ case ZFS_PROP_PBKDF2_ITERS:
/*
* These properties are stored as numbers, but they are
- * identifiers.
+ * identifiers or counters.
* We don't want them to be pretty printed, because pretty
- * printing mangles the ID into a truncated and useless value.
+ * printing truncates their values making them useless.
*/
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
return (-1);
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 2501965e42ad..00b0b6faf2e2 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -4495,7 +4495,7 @@ zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
}
int
-zpool_set_bootenv(zpool_handle_t *zhp, const char *envmap)
+zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
{
int error = lzc_set_bootenv(zhp->zpool_name, envmap);
if (error != 0) {
@@ -4508,24 +4508,20 @@ zpool_set_bootenv(zpool_handle_t *zhp, const char *envmap)
}
int
-zpool_get_bootenv(zpool_handle_t *zhp, char *outbuf, size_t size, off_t offset)
+zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)
{
- nvlist_t *nvl = NULL;
- int error = lzc_get_bootenv(zhp->zpool_name, &nvl);
+ nvlist_t *nvl;
+ int error;
+
+ nvl = NULL;
+ error = lzc_get_bootenv(zhp->zpool_name, &nvl);
if (error != 0) {
(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
dgettext(TEXT_DOMAIN,
"error getting bootenv in pool '%s'"), zhp->zpool_name);
- return (-1);
- }
- char *envmap = fnvlist_lookup_string(nvl, "envmap");
- if (offset >= strlen(envmap)) {
- fnvlist_free(nvl);
- return (0);
+ } else {
+ *nvlp = nvl;
}
- strncpy(outbuf, envmap + offset, size);
- int bytes = MIN(strlen(envmap + offset), size);
- fnvlist_free(nvl);
- return (bytes);
+ return (error);
}
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index b17b105ca34b..b46a4f122c47 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -2640,6 +2640,7 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
if (len > hdl->libzfs_max_nvlist) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
+ free(buf);
return (ENOMEM);
}
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 301c8ddbab11..651bca2978ce 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -1060,6 +1060,9 @@ libzfs_init(void)
if ((error = zfs_nicestrtonum(hdl, env,
&hdl->libzfs_max_nvlist))) {
errno = error;
+ (void) close(hdl->libzfs_fd);
+ (void) fclose(hdl->libzfs_mnttab);
+ free(hdl);
return (NULL);
}
} else {
diff --git a/lib/libzfs/os/linux/libzfs_pool_os.c b/lib/libzfs/os/linux/libzfs_pool_os.c
index 5c6da5338dbc..e4f03aa43b4e 100644
--- a/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/lib/libzfs/os/linux/libzfs_pool_os.c
@@ -72,9 +72,6 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
* It's possible that we might encounter an error if the device
* does not have any unallocated space left. If so, we simply
* ignore that error and continue on.
- *
- * Also, we don't call efi_rescan() - that would just return EBUSY.
- * The module will do it for us in vdev_disk_open().
*/
error = efi_use_whole_disk(fd);
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index 50b1b9dec260..a3ba3b28427f 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -1625,13 +1625,9 @@ lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
* Set the bootenv contents for the given pool.
*/
int
-lzc_set_bootenv(const char *pool, const char *env)
+lzc_set_bootenv(const char *pool, const nvlist_t *env)
{
- nvlist_t *args = fnvlist_alloc();
- fnvlist_add_string(args, "envmap", env);
- int error = lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, args, NULL);
- fnvlist_free(args);
- return (error);
+ return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
}
/*
diff --git a/lib/libzfs_core/libzfs_core.pc.in b/lib/libzfs_core/libzfs_core.pc.in
index e14d42d11a5d..bc9582ea33d0 100644
--- a/lib/libzfs_core/libzfs_core.pc.in
+++ b/lib/libzfs_core/libzfs_core.pc.in
@@ -6,8 +6,8 @@ includedir=@includedir@
Name: libzfs_core
Description: LibZFS core library
Version: @VERSION@
-URL: https://zfsonlinux.org
-Requires.private: blkid uuid libtirpc zlib
+URL: https://github.com/openzfs/zfs
+Requires.private: @LIBBLKID_PC@ @LIBUUID_PC@ @LIBTIRPC_PC@ @ZLIB_PC@
Cflags: -I${includedir}/libzfs -I${includedir}/libspl
Libs: -L${libdir} -lzfs_core -lnvpair
Libs.private: @LIBCLOCK_GETTIME@ @LIBUDEV_LIBS@ -lm -pthread
diff --git a/lib/libzfsbootenv/.gitignore b/lib/libzfsbootenv/.gitignore
new file mode 100644
index 000000000000..3fea5c642d02
--- /dev/null
+++ b/lib/libzfsbootenv/.gitignore
@@ -0,0 +1 @@
+/libzfsbootenv.pc
diff --git a/lib/libzfsbootenv/Makefile.am b/lib/libzfsbootenv/Makefile.am
new file mode 100644
index 000000000000..6b9a8f0137a2
--- /dev/null
+++ b/lib/libzfsbootenv/Makefile.am
@@ -0,0 +1,32 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgconfig_DATA = libzfsbootenv.pc
+
+lib_LTLIBRARIES = libzfsbootenv.la
+
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
+USER_C = \
+ lzbe_device.c \
+ lzbe_pair.c \
+ lzbe_util.c
+
+dist_libzfsbootenv_la_SOURCES = \
+ $(USER_C)
+
+libzfsbootenv_la_LIBADD = \
+ $(abs_top_builddir)/lib/libzfs/libzfs.la \
+ $(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+libzfsbootenv_la_LDFLAGS =
+
+if !ASAN_ENABLED
+libzfsbootenv_la_LDFLAGS += -Wl,-z,defs
+endif
+
+libzfsbootenv_la_LDFLAGS += -version-info 1:0:0
diff --git a/lib/libzfsbootenv/libzfsbootenv.pc.in b/lib/libzfsbootenv/libzfsbootenv.pc.in
new file mode 100644
index 000000000000..61bafa66e3fd
--- /dev/null
+++ b/lib/libzfsbootenv/libzfsbootenv.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libzfsbootenv
+Description: LibZFSBootENV library
+Version: @VERSION@
+URL: https://zfsonlinux.org
+Requires: libzfs libnvpair
+Cflags: -I${includedir}
+Libs: -L${libdir} -lzfsbootenv
diff --git a/lib/libzfsbootenv/lzbe_device.c b/lib/libzfsbootenv/lzbe_device.c
new file mode 100644
index 000000000000..670efd8b0603
--- /dev/null
+++ b/lib/libzfsbootenv/lzbe_device.c
@@ -0,0 +1,164 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+#include <sys/zfs_bootenv.h>
+#include <sys/vdev_impl.h>
+
+/*
+ * Store device name to zpool label bootenv area.
+ * This call will set bootenv version to VB_NVLIST, if bootenv currently
+ * does contain other version, then old data will be replaced.
+ */
+int
+lzbe_set_boot_device(const char *pool, lzbe_flags_t flag, const char *device)
+{
+ libzfs_handle_t *hdl;
+ zpool_handle_t *zphdl;
+ nvlist_t *nv;
+ char *descriptor;
+ uint64_t version;
+ int rv = -1;
+
+ if (pool == NULL || *pool == '\0')
+ return (rv);
+
+ if ((hdl = libzfs_init()) == NULL)
+ return (rv);
+
+ zphdl = zpool_open(hdl, pool);
+ if (zphdl == NULL) {
+ libzfs_fini(hdl);
+ return (rv);
+ }
+
+ switch (flag) {
+ case lzbe_add:
+ rv = zpool_get_bootenv(zphdl, &nv);
+ if (rv == 0) {
+ /*
+ * We got the nvlist, check for version.
+ * if version is missing or is not VB_NVLIST,
+ * create new list.
+ */
+ rv = nvlist_lookup_uint64(nv, BOOTENV_VERSION,
+ &version);
+ if (rv == 0 && version == VB_NVLIST)
+ break;
+
+ /* Drop this nvlist */
+ fnvlist_free(nv);
+ }
+ /* FALLTHROUGH */
+ case lzbe_replace:
+ nv = fnvlist_alloc();
+ break;
+ default:
+ return (rv);
+ }
+
+ /* version is mandatory */
+ fnvlist_add_uint64(nv, BOOTENV_VERSION, VB_NVLIST);
+
+ /*
+ * If device name is empty, remove boot device configuration.
+ */
+ if ((device == NULL || *device == '\0')) {
+ if (nvlist_exists(nv, OS_BOOTONCE))
+ fnvlist_remove(nv, OS_BOOTONCE);
+ } else {
+ /*
+ * Use device name directly if it does start with
+ * prefix "zfs:". Otherwise, add prefix and sufix.
+ */
+ if (strncmp(device, "zfs:", 4) == 0) {
+ fnvlist_add_string(nv, OS_BOOTONCE, device);
+ } else {
+ descriptor = NULL;
+ if (asprintf(&descriptor, "zfs:%s:", device) > 0)
+ fnvlist_add_string(nv, OS_BOOTONCE, descriptor);
+ else
+ rv = ENOMEM;
+ free(descriptor);
+ }
+ }
+
+ rv = zpool_set_bootenv(zphdl, nv);
+ if (rv != 0)
+ fprintf(stderr, "%s\n", libzfs_error_description(hdl));
+
+ fnvlist_free(nv);
+ zpool_close(zphdl);
+ libzfs_fini(hdl);
+ return (rv);
+}
+
+/*
+ * Return boot device name from bootenv, if set.
+ */
+int
+lzbe_get_boot_device(const char *pool, char **device)
+{
+ libzfs_handle_t *hdl;
+ zpool_handle_t *zphdl;
+ nvlist_t *nv;
+ char *val;
+ int rv = -1;
+
+ if (pool == NULL || *pool == '\0' || device == NULL)
+ return (rv);
+
+ if ((hdl = libzfs_init()) == NULL)
+ return (rv);
+
+ zphdl = zpool_open(hdl, pool);
+ if (zphdl == NULL) {
+ libzfs_fini(hdl);
+ return (rv);
+ }
+
+ rv = zpool_get_bootenv(zphdl, &nv);
+ if (rv == 0) {
+ rv = nvlist_lookup_string(nv, OS_BOOTONCE, &val);
+ if (rv == 0) {
+ /*
+ * zfs device descriptor is in form of "zfs:dataset:",
+ * we only do need dataset name.
+ */
+ if (strncmp(val, "zfs:", 4) == 0) {
+ val += 4;
+ val = strdup(val);
+ if (val != NULL) {
+ size_t len = strlen(val);
+
+ if (val[len - 1] == ':')
+ val[len - 1] = '\0';
+ *device = val;
+ } else {
+ rv = ENOMEM;
+ }
+ } else {
+ rv = EINVAL;
+ }
+ }
+ nvlist_free(nv);
+ }
+
+ zpool_close(zphdl);
+ libzfs_fini(hdl);
+ return (rv);
+}
diff --git a/lib/libzfsbootenv/lzbe_pair.c b/lib/libzfsbootenv/lzbe_pair.c
new file mode 100644
index 000000000000..831355ba4b7c
--- /dev/null
+++ b/lib/libzfsbootenv/lzbe_pair.c
@@ -0,0 +1,347 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+#include <sys/zfs_bootenv.h>
+#include <sys/vdev_impl.h>
+
+/*
+ * Get or create nvlist. If key is not NULL, get nvlist from bootenv,
+ * otherwise return bootenv.
+ */
+int
+lzbe_nvlist_get(const char *pool, const char *key, void **ptr)
+{
+ libzfs_handle_t *hdl;
+ zpool_handle_t *zphdl;
+ nvlist_t *nv;
+ int rv = -1;
+
+ if (pool == NULL || *pool == '\0')
+ return (rv);
+
+ if ((hdl = libzfs_init()) == NULL) {
+ return (rv);
+ }
+
+ zphdl = zpool_open(hdl, pool);
+ if (zphdl == NULL) {
+ libzfs_fini(hdl);
+ return (rv);
+ }
+
+ rv = zpool_get_bootenv(zphdl, &nv);
+ if (rv == 0) {
+ nvlist_t *nvl, *dup;
+
+ if (key != NULL) {
+ rv = nvlist_lookup_nvlist(nv, key, &nvl);
+ if (rv == 0) {
+ rv = nvlist_dup(nvl, &dup, 0);
+ nvlist_free(nv);
+ if (rv == 0)
+ nv = dup;
+ else
+ nv = NULL;
+ } else {
+ nvlist_free(nv);
+ rv = nvlist_alloc(&nv, NV_UNIQUE_NAME, 0);
+ }
+ }
+ *ptr = nv;
+ }
+
+ zpool_close(zphdl);
+ libzfs_fini(hdl);
+ return (rv);
+}
+
+int
+lzbe_nvlist_set(const char *pool, const char *key, void *ptr)
+{
+ libzfs_handle_t *hdl;
+ zpool_handle_t *zphdl;
+ nvlist_t *nv;
+ uint64_t version;
+ int rv = -1;
+
+ if (pool == NULL || *pool == '\0')
+ return (rv);
+
+ if ((hdl = libzfs_init()) == NULL) {
+ return (rv);
+ }
+
+ zphdl = zpool_open(hdl, pool);
+ if (zphdl == NULL) {
+ libzfs_fini(hdl);
+ return (rv);
+ }
+
+ if (key != NULL) {
+ rv = zpool_get_bootenv(zphdl, &nv);
+ if (rv == 0) {
+ /*
+ * We got the nvlist, check for version.
+ * if version is missing or is not VB_NVLIST,
+ * create new list.
+ */
+ rv = nvlist_lookup_uint64(nv, BOOTENV_VERSION,
+ &version);
+ if (rv != 0 || version != VB_NVLIST) {
+ /* Drop this nvlist */
+ fnvlist_free(nv);
+ /* Create and prepare new nvlist */
+ nv = fnvlist_alloc();
+ fnvlist_add_uint64(nv, BOOTENV_VERSION,
+ VB_NVLIST);
+ }
+ rv = nvlist_add_nvlist(nv, key, ptr);
+ if (rv == 0)
+ rv = zpool_set_bootenv(zphdl, nv);
+ nvlist_free(nv);
+ }
+ } else {
+ rv = zpool_set_bootenv(zphdl, ptr);
+ }
+
+ zpool_close(zphdl);
+ libzfs_fini(hdl);
+ return (rv);
+}
+
+/*
+ * free nvlist we got via lzbe_nvlist_get()
+ */
+void
+lzbe_nvlist_free(void *ptr)
+{
+ nvlist_free(ptr);
+}
+
+static const char *typenames[] = {
+ "DATA_TYPE_UNKNOWN",
+ "DATA_TYPE_BOOLEAN",
+ "DATA_TYPE_BYTE",
+ "DATA_TYPE_INT16",
+ "DATA_TYPE_UINT16",
+ "DATA_TYPE_INT32",
+ "DATA_TYPE_UINT32",
+ "DATA_TYPE_INT64",
+ "DATA_TYPE_UINT64",
+ "DATA_TYPE_STRING",
+ "DATA_TYPE_BYTE_ARRAY",
+ "DATA_TYPE_INT16_ARRAY",
+ "DATA_TYPE_UINT16_ARRAY",
+ "DATA_TYPE_INT32_ARRAY",
+ "DATA_TYPE_UINT32_ARRAY",
+ "DATA_TYPE_INT64_ARRAY",
+ "DATA_TYPE_UINT64_ARRAY",
+ "DATA_TYPE_STRING_ARRAY",
+ "DATA_TYPE_HRTIME",
+ "DATA_TYPE_NVLIST",
+ "DATA_TYPE_NVLIST_ARRAY",
+ "DATA_TYPE_BOOLEAN_VALUE",
+ "DATA_TYPE_INT8",
+ "DATA_TYPE_UINT8",
+ "DATA_TYPE_BOOLEAN_ARRAY",
+ "DATA_TYPE_INT8_ARRAY",
+ "DATA_TYPE_UINT8_ARRAY"
+};
+
+static int
+nvpair_type_from_name(const char *name)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(typenames); i++) {
+ if (strcmp(name, typenames[i]) == 0)
+ return (i);
+ }
+ return (0);
+}
+
+/*
+ * Add pair defined by key, type and value into nvlist.
+ */
+int
+lzbe_add_pair(void *ptr, const char *key, const char *type, void *value,
+ size_t size)
+{
+ nvlist_t *nv = ptr;
+ data_type_t dt;
+ int rv = 0;
+
+ if (ptr == NULL || key == NULL || value == NULL)
+ return (rv);
+
+ if (type == NULL)
+ type = "DATA_TYPE_STRING";
+ dt = nvpair_type_from_name(type);
+ if (dt == DATA_TYPE_UNKNOWN)
+ return (EINVAL);
+
+ switch (dt) {
+ case DATA_TYPE_BYTE:
+ if (size != sizeof (uint8_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_byte(nv, key, *(uint8_t *)value);
+ break;
+
+ case DATA_TYPE_INT16:
+ if (size != sizeof (int16_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_int16(nv, key, *(int16_t *)value);
+ break;
+
+ case DATA_TYPE_UINT16:
+ if (size != sizeof (uint16_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_uint16(nv, key, *(uint16_t *)value);
+ break;
+
+ case DATA_TYPE_INT32:
+ if (size != sizeof (int32_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_int32(nv, key, *(int32_t *)value);
+ break;
+
+ case DATA_TYPE_UINT32:
+ if (size != sizeof (uint32_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_uint32(nv, key, *(uint32_t *)value);
+ break;
+
+ case DATA_TYPE_INT64:
+ if (size != sizeof (int64_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_int64(nv, key, *(int64_t *)value);
+ break;
+
+ case DATA_TYPE_UINT64:
+ if (size != sizeof (uint64_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_uint64(nv, key, *(uint64_t *)value);
+ break;
+
+ case DATA_TYPE_STRING:
+ rv = nvlist_add_string(nv, key, value);
+ break;
+
+ case DATA_TYPE_BYTE_ARRAY:
+ rv = nvlist_add_byte_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_INT16_ARRAY:
+ rv = nvlist_add_int16_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_UINT16_ARRAY:
+ rv = nvlist_add_uint16_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_INT32_ARRAY:
+ rv = nvlist_add_int32_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_UINT32_ARRAY:
+ rv = nvlist_add_uint32_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_INT64_ARRAY:
+ rv = nvlist_add_int64_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_UINT64_ARRAY:
+ rv = nvlist_add_uint64_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_STRING_ARRAY:
+ rv = nvlist_add_string_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_NVLIST:
+ rv = nvlist_add_nvlist(nv, key, (nvlist_t *)value);
+ break;
+
+ case DATA_TYPE_NVLIST_ARRAY:
+ rv = nvlist_add_nvlist_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ if (size != sizeof (boolean_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_boolean_value(nv, key, *(boolean_t *)value);
+ break;
+
+ case DATA_TYPE_INT8:
+ if (size != sizeof (int8_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_int8(nv, key, *(int8_t *)value);
+ break;
+
+ case DATA_TYPE_UINT8:
+ if (size != sizeof (uint8_t)) {
+ rv = EINVAL;
+ break;
+ }
+ rv = nvlist_add_uint8(nv, key, *(uint8_t *)value);
+ break;
+
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ rv = nvlist_add_boolean_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_INT8_ARRAY:
+ rv = nvlist_add_int8_array(nv, key, value, size);
+ break;
+
+ case DATA_TYPE_UINT8_ARRAY:
+ rv = nvlist_add_uint8_array(nv, key, value, size);
+ break;
+
+ default:
+ return (ENOTSUP);
+ }
+
+ return (rv);
+}
+
+int
+lzbe_remove_pair(void *ptr, const char *key)
+{
+
+ return (nvlist_remove_all(ptr, key));
+}
diff --git a/lib/libzfsbootenv/lzbe_util.c b/lib/libzfsbootenv/lzbe_util.c
new file mode 100644
index 000000000000..35e98549582e
--- /dev/null
+++ b/lib/libzfsbootenv/lzbe_util.c
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+
+/*
+ * Output bootenv information.
+ */
+int
+lzbe_bootenv_print(const char *pool, const char *nvlist, FILE *of)
+{
+ nvlist_t *nv;
+ int rv = -1;
+
+ if (pool == NULL || *pool == '\0' || of == NULL)
+ return (rv);
+
+ rv = lzbe_nvlist_get(pool, nvlist, (void **)&nv);
+ if (rv == 0) {
+ nvlist_print(of, nv);
+ nvlist_free(nv);
+ }
+
+ return (rv);
+}
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am
index 9c1b81bf5373..992c21cc1560 100644
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -7,6 +7,13 @@ VPATH = \
$(top_srcdir)/module/os/linux/zfs \
$(top_srcdir)/lib/libzpool
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
# Unconditionally enable debugging for libzpool
AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
diff --git a/lib/libzutil/Makefile.am b/lib/libzutil/Makefile.am
index cd03ba19d508..1b55ef68074a 100644
--- a/lib/libzutil/Makefile.am
+++ b/lib/libzutil/Makefile.am
@@ -40,6 +40,7 @@ libzutil_la_SOURCES = $(USER_C)
libzutil_la_LIBADD = \
$(abs_top_builddir)/lib/libavl/libavl.la \
$(abs_top_builddir)/lib/libtpool/libtpool.la \
+ $(abs_top_builddir)/lib/libnvpair/libnvpair.la \
$(abs_top_builddir)/lib/libspl/libspl.la
if BUILD_LINUX
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 893bbf65220d..a266d9a7e96c 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1,6 +1,6 @@
'\" te
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
-.\" Copyright (c) 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2019, 2020 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
@@ -201,6 +201,22 @@ Default value: \fB200\fR%.
.sp
.ne 2
.na
+\fBl2arc_mfuonly\fR (int)
+.ad
+.RS 12n
+Controls whether only MFU metadata and data are cached from ARC into L2ARC.
+This may be desired to avoid wasting space on L2ARC when reading/writing large
+amounts of data that are not expected to be accessed more than once. The
+default is \fB0\fR, meaning both MRU and MFU data and metadata are cached.
+When turning off (\fB0\fR) this feature some MRU buffers will still be present
+in ARC and eventually cached on L2ARC.
+.sp
+Use \fB0\fR for no (default) and \fB1\fR for yes.
+.RE
+
+.sp
+.ne 2
+.na
\fBl2arc_meta_percent\fR (int)
.ad
.RS 12n
@@ -744,6 +760,28 @@ Default value: \fB32,768\fR.
.sp
.ne 2
.na
+\fBvdev_file_logical_ashift\fR (ulong)
+.ad
+.RS 12n
+Logical ashift for file-based devices.
+.sp
+Default value: \fB9\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBvdev_file_physical_ashift\fR (ulong)
+.ad
+.RS 12n
+Physical ashift for file-based devices.
+.sp
+Default value: \fB9\fR.
+.RE
+
+.sp
+.ne 2
+.na
\fBzap_iterate_prefetch\fR (int)
.ad
.RS 12n
@@ -3657,6 +3695,27 @@ Default value: \fB0\fR.
.sp
.ne 2
.na
+\fBzfs_zevent_retain_max\fR (int)
+.ad
+.RS 12n
+Maximum recent zevent records to retain for duplicate checking. Setting
+this value to zero disables duplicate detection.
+.sp
+Default value: \fB2000\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBzfs_zevent_retain_expire_secs\fR (int)
+.ad
+.RS 12n
+Lifespan for a recent ereport that was retained for duplicate checking.
+.sp
+Default value: \fB900\fR.
+.RE
+
+.na
\fBzfs_zil_clean_taskq_maxalloc\fR (int)
.ad
.RS 12n
diff --git a/man/man8/zfs-rename.8 b/man/man8/zfs-rename.8
index d8d9f49d7689..78bd8934a00f 100644
--- a/man/man8/zfs-rename.8
+++ b/man/man8/zfs-rename.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd September 1, 2020
.Dt ZFS-RENAME 8
.Os
.Sh NAME
@@ -52,8 +52,11 @@
.Cm rename
.Fl u
.Op Fl f
-.Ar filesystem
-.Ar filesystem
+.Ar filesystem Ar filesystem
+.Nm
+.Cm rename
+.Fl r
+.Ar snapshot Ar snapshot
.Sh DESCRIPTION
.Bl -tag -width ""
.It Xo
diff --git a/man/man8/zfsprops.8 b/man/man8/zfsprops.8
index a4e2829e3fad..2c4a2af29480 100644
--- a/man/man8/zfsprops.8
+++ b/man/man8/zfsprops.8
@@ -38,7 +38,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
.\"
-.Dd January 30, 2020
+.Dd September 1, 2020
.Dt ZFSPROPS 8
.Os
.Sh NAME
@@ -651,7 +651,7 @@ you must first remove all
.Tn ACL
entries which do not represent the current mode.
.El
-.It Sy acltype Ns = Ns Sy off Ns | Ns Sy noacl Ns | Ns Sy posixacl
+.It Sy acltype Ns = Ns Sy off Ns | Ns Sy posix
Controls whether ACLs are enabled and if so what type of ACL to use.
This property is not visible on FreeBSD yet.
.Bl -tag -width "posixacl"
@@ -662,15 +662,18 @@ property set to off then ACLs are disabled.
.It Sy noacl
an alias for
.Sy off
-.It Sy posixacl
+.It Sy posix
indicates POSIX ACLs should be used. POSIX ACLs are specific to Linux and are
not functional on other platforms. POSIX ACLs are stored as an extended
attribute and therefore will not overwrite any existing NFSv4 ACLs which
may be set.
+.It Sy posixacl
+an alias for
+.Sy posix
.El
.Pp
To obtain the best performance when setting
-.Sy posixacl
+.Sy posix
users are strongly encouraged to set the
.Sy xattr=sa
property. This will result in the POSIX ACL being stored more efficiently on
@@ -1049,8 +1052,9 @@ In order to provide consistent data protection, encryption must be specified at
dataset creation time and it cannot be changed afterwards.
.Pp
For more details and caveats about encryption see the
-.Sy Encryption
-section.
+.Em Encryption
+section of
+.Xr zfs-load-key 8 .
.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase
Controls what format the user's encryption key will be provided as. This
property is only set when the dataset is encrypted.
diff --git a/man/man8/zgenhostid.8 b/man/man8/zgenhostid.8
index ff3d2d960b12..ff198443dd2a 100644
--- a/man/man8/zgenhostid.8
+++ b/man/man8/zgenhostid.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
.\"
-.Dd September 16, 2017
+.Dd September 13, 2020
.Dt ZGENHOSTID 8 SMM
.Os
.Sh NAME
@@ -30,42 +30,73 @@
.Em /etc/hostid
.Sh SYNOPSIS
.Nm
+.Op Fl f
+.Op Fl o Ar filename
.Op Ar hostid
.Sh DESCRIPTION
-If
-.Em /etc/hostid
-does not exist, create it and store a hostid in it. If the user provides
-.Op Ar hostid
-on the command line, store that value. Otherwise, randomly generate a
-value to store.
-.Pp
-This emulates the
-.Xr genhostid 1
-utility and is provided for use on systems which do not include the utility.
-.Sh OPTIONS
+Creates
+.Pa /etc/hostid
+file and stores hostid in it.
+If the user provides
.Op Ar hostid
+on the command line, validates and stores that value.
+Otherwise, randomly generates a value to store.
+.Bl -tag -width "hostid"
+.It Fl h
+Display a summary of the command-line options.
+.It Fl f
+Force file overwrite.
+.It Fl o Ar filename
+Write to
+.Pa filename
+instead of default
+.Pa /etc/hostd
+.It Ar hostid
Specifies the value to be placed in
-.Em /etc/hostid .
-It must be a number with a value between 1 and 2^32-1. This value
+.Pa /etc/hostid .
+It must be a number with a value between 1 and 2^32-1.
+This value
.Sy must
-be unique among your systems. It must be expressed in hexadecimal and be
-exactly 8 digits long.
+be unique among your systems.
+It
+.Sy must
+be expressed in hexadecimal and be exactly
+.Em 8
+digits long, optionally prefixed by
+.Em 0x .
+.El
+.Sh FILES
+.Pa /etc/hostid
.Sh EXAMPLES
-.Bl -tag -width Ds
+.Bl -tag -width Bd
.It Generate a random hostid and store it
.Bd -literal
# zgenhostid
.Ed
-.It Record the libc-generated hostid in Em /etc/hostid
+.It Record the libc-generated hostid in Pa /etc/hostid
.Bd -literal
-# zgenhostid $(hostid)
+# zgenhostid "$(hostid)"
.Ed
-.It Record a custom hostid (0xdeadbeef) in Em etc/hostid
+.It Record a custom hostid (0xdeadbeef) in Pa /etc/hostid
.Bd -literal
# zgenhostid deadbeef
.Ed
+.It Record a custom hostid (0x01234567) in Pa /tmp/hostid
+and ovewrite the file if it exists
+.Bd -literal
+# zgenhostid -f -o /tmp/hostid 0x01234567
+.Ed
.El
.Sh SEE ALSO
.Xr genhostid 1 ,
.Xr hostid 1 ,
+.Xr sethostid 3 ,
.Xr spl-module-parameters 5
+.Sh HISTORY
+.Nm
+emulates the
+.Xr genhostid 1
+utility and is provided for use on systems which
+do not include the utility or do not provide
+.Xr sethostid 3
+call.
diff --git a/man/man8/zpoolprops.8 b/man/man8/zpoolprops.8
index 5adbfb321115..3437e48864ce 100644
--- a/man/man8/zpoolprops.8
+++ b/man/man8/zpoolprops.8
@@ -77,7 +77,7 @@ The zpool
.Sy free
property is not generally useful for this purpose, and can be substantially more than the zfs
.Sy available
-space. This discrepancy is due to several factors, including raidz party; zfs
+space. This discrepancy is due to several factors, including raidz parity; zfs
reservation, quota, refreservation, and refquota properties; and space set aside by
.Sy spa_slop_shift
(see
diff --git a/module/os/freebsd/zfs/kmod_core.c b/module/os/freebsd/zfs/kmod_core.c
index dce73577eacd..4c696129857a 100644
--- a/module/os/freebsd/zfs/kmod_core.c
+++ b/module/os/freebsd/zfs/kmod_core.c
@@ -378,4 +378,3 @@ MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
#endif
MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1);
-MODULE_DEPEND(zfsctrl, cryptodev, 1, 1, 1);
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c
index 200bbf43d757..b3cb7e7e4374 100644
--- a/module/os/freebsd/zfs/sysctl_os.c
+++ b/module/os/freebsd/zfs/sysctl_os.c
@@ -121,6 +121,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
"ZFS livelist condense");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
+SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
"ZFS VDEV mirror");
diff --git a/module/os/freebsd/zfs/vdev_file.c b/module/os/freebsd/zfs/vdev_file.c
index 4d27751c8893..cf762c5fd61c 100644
--- a/module/os/freebsd/zfs/vdev_file.c
+++ b/module/os/freebsd/zfs/vdev_file.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -40,6 +40,9 @@
static taskq_t *vdev_file_taskq;
+unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+
void
vdev_file_init(void)
{
@@ -167,8 +170,8 @@ skip_open:
}
*max_psize = *psize = zfa.zfa_size;
- *logical_ashift = SPA_MINBLOCKSHIFT;
- *physical_ashift = SPA_MINBLOCKSHIFT;
+ *logical_ashift = vdev_file_logical_ashift;
+ *physical_ashift = vdev_file_physical_ashift;
return (0);
}
@@ -326,3 +329,8 @@ vdev_ops_t vdev_disk_ops = {
};
#endif
+
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+ "Logical ashift for file-based devices");
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+ "Physical ashift for file-based devices");
diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c
index ec7c04717c84..d7786d5136a2 100644
--- a/module/os/freebsd/zfs/zfs_file_os.c
+++ b/module/os/freebsd/zfs/zfs_file_os.c
@@ -234,13 +234,10 @@ drop:
int
zfs_file_fsync(zfs_file_t *fp, int flags)
{
- struct vnode *v;
-
if (fp->f_type != DTYPE_VNODE)
return (EINVAL);
- v = fp->f_data;
- return (zfs_vop_fsync(v));
+ return (zfs_vop_fsync(fp->f_vnode));
}
int
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index b6cf0c92b70b..77812ca8d400 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -975,7 +975,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
#else
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
#endif
- rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
+ ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
@@ -1126,7 +1126,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
ASSERT(zfsvfs->z_nr_znodes == 0);
list_destroy(&zfsvfs->z_all_znodes);
rrm_destroy(&zfsvfs->z_teardown_lock);
- rw_destroy(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs);
rw_destroy(&zfsvfs->z_fuid_lock);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
@@ -1545,7 +1545,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
zfsvfs->z_log = NULL;
}
- rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
+ ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs);
/*
* If we are not unmounting (ie: online recv) and someone already
@@ -1553,7 +1553,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
* or a reopen of z_os failed then just bail out now.
*/
if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
return (SET_ERROR(EIO));
}
@@ -1581,7 +1581,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
*/
if (unmounting) {
zfsvfs->z_unmounted = B_TRUE;
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
}
@@ -1901,7 +1901,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
znode_t *zp;
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
- ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+ ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/*
* We already own this, so just update the objset_t, as the one we
@@ -1939,7 +1939,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
bail:
/* release the VOPs */
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) {
@@ -2056,7 +2056,7 @@ int
zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
{
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
- ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+ ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
/*
* We already own this, so just hold and rele it to update the
@@ -2072,7 +2072,7 @@ zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zfsvfs->z_os = os;
/* release the VOPs */
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
/*
diff --git a/module/os/freebsd/zfs/zfs_vnops.c b/module/os/freebsd/zfs/zfs_vnops.c
index 2a4acf21582f..2dde5b1f9d5c 100644
--- a/module/os/freebsd/zfs/zfs_vnops.c
+++ b/module/os/freebsd/zfs/zfs_vnops.c
@@ -4638,13 +4638,13 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
int error;
- rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+ ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL) {
/*
* The fs has been unmounted, or we did a
* suspend/resume and this file no longer exists.
*/
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp);
return;
}
@@ -4653,7 +4653,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
/*
* Fast path to recycle a vnode of a removed file.
*/
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vrecycle(vp);
return;
}
@@ -4673,7 +4673,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
dmu_tx_commit(tx);
}
}
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
}
@@ -5823,10 +5823,10 @@ zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
if (vn_need_pageq_flush(vp))
return (1);
- if (!rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER))
+ if (!ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs))
return (1);
need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
return (need);
}
@@ -5857,12 +5857,12 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
* zfs_znode_dmu_fini in zfsvfs_teardown during
* force unmount.
*/
- rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+ ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs);
if (zp->z_sa_hdl == NULL)
zfs_znode_free(zp);
else
zfs_zinactive(zp);
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
vp->v_data = NULL;
return (0);
diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c
index 76e24b1bdf51..653f42239df9 100644
--- a/module/os/freebsd/zfs/zfs_znode.c
+++ b/module/os/freebsd/zfs/zfs_znode.c
@@ -384,7 +384,7 @@ zfs_znode_dmu_fini(znode_t *zp)
{
ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
zp->z_unlinked ||
- RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
+ ZFS_TEARDOWN_INACTIVE_WLOCKED(zp->z_zfsvfs));
sa_handle_destroy(zp->z_sa_hdl);
zp->z_sa_hdl = NULL;
diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c
index 9d045e3e8a66..49f48664503a 100644
--- a/module/os/linux/spl/spl-condvar.c
+++ b/module/os/linux/spl/spl-condvar.c
@@ -198,6 +198,18 @@ __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
}
EXPORT_SYMBOL(__cv_wait_sig);
+void
+__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
+{
+ sigset_t blocked, saved;
+
+ sigfillset(&blocked);
+ (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+ cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
+ (void) sigprocmask(SIG_SETMASK, &saved, NULL);
+}
+EXPORT_SYMBOL(__cv_wait_idle);
+
#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
#define spl_io_schedule_timeout(t) io_schedule_timeout(t)
#else
@@ -330,6 +342,21 @@ __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
}
EXPORT_SYMBOL(__cv_timedwait_sig);
+int
+__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
+{
+ sigset_t blocked, saved;
+ int rc;
+
+ sigfillset(&blocked);
+ (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+ rc = __cv_timedwait_common(cvp, mp, exp_time,
+ TASK_INTERRUPTIBLE, 0);
+ (void) sigprocmask(SIG_SETMASK, &saved, NULL);
+
+ return (rc);
+}
+EXPORT_SYMBOL(__cv_timedwait_idle);
/*
* 'expire_time' argument is an absolute clock time in nanoseconds.
* Return value is time left (expire_time - now) or -1 if timeout occurred.
@@ -427,6 +454,23 @@ cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
}
EXPORT_SYMBOL(cv_timedwait_sig_hires);
+int
+cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+ hrtime_t res, int flag)
+{
+ sigset_t blocked, saved;
+ int rc;
+
+ sigfillset(&blocked);
+ (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+ rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+ TASK_INTERRUPTIBLE);
+ (void) sigprocmask(SIG_SETMASK, &saved, NULL);
+
+ return (rc);
+}
+EXPORT_SYMBOL(cv_timedwait_idle_hires);
+
void
__cv_signal(kcondvar_t *cvp)
{
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 5a2245436c72..85daef43be40 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -34,6 +34,7 @@
#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
+#include <linux/blkpg.h>
#include <linux/msdos_fs.h>
#include <linux/vfs_compat.h>
@@ -175,7 +176,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
/*
* Reopen the device if it is currently open. When expanding a
- * partition force re-scanning the partition table while closed
+ * partition force re-scanning the partition table if userland
+ * did not take care of this already. We need to do this while closed
* in order to get an accurate updated block device size. Then
* since udev may need to recreate the device links increase the
* open retry timeout before reporting the device as unavailable.
@@ -192,7 +194,23 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (bdev) {
if (v->vdev_expanding && bdev != bdev->bd_contains) {
bdevname(bdev->bd_contains, disk_name + 5);
- reread_part = B_TRUE;
+ /*
+ * If userland has BLKPG_RESIZE_PARTITION,
+ * then it should have updated the partition
+ * table already. We can detect this by
+ * comparing our current physical size
+ * with that of the device. If they are
+ * the same, then we must not have
+ * BLKPG_RESIZE_PARTITION or it failed to
+ * update the partition table online. We
+ * fallback to rescanning the partition
+ * table from the kernel below. However,
+ * if the capacity already reflects the
+ * updated partition, then we skip
+ * rescanning the partition table here.
+ */
+ if (v->vdev_psize == bdev_capacity(bdev))
+ reread_part = B_TRUE;
}
blkdev_put(bdev, mode | FMODE_EXCL);
diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c
index a4e71ca40788..423ce858144c 100644
--- a/module/os/linux/zfs/vdev_file.c
+++ b/module/os/linux/zfs/vdev_file.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -45,6 +45,17 @@
static taskq_t *vdev_file_taskq;
+/*
+ * By default, the logical/physical ashift for file vdevs is set to
+ * SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9)
+ * blocksizes. Users may opt to change one or both of these for testing
+ * or performance reasons. Care should be taken as these values will
+ * impact the vdev_ashift setting which can only be set at vdev creation
+ * time.
+ */
+unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+
static void
vdev_file_hold(vdev_t *vd)
{
@@ -159,8 +170,8 @@ skip_open:
}
*max_psize = *psize = zfa.zfa_size;
- *logical_ashift = SPA_MINBLOCKSHIFT;
- *physical_ashift = SPA_MINBLOCKSHIFT;
+ *logical_ashift = vdev_file_logical_ashift;
+ *physical_ashift = vdev_file_physical_ashift;
return (0);
}
@@ -346,3 +357,8 @@ vdev_ops_t vdev_disk_ops = {
};
#endif
+
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+ "Logical ashift for file-based devices");
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+ "Physical ashift for file-based devices");
diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c
index 8d79878c0458..11b5559321ad 100644
--- a/module/os/linux/zfs/zfs_acl.c
+++ b/module/os/linux/zfs/zfs_acl.c
@@ -1153,7 +1153,7 @@ zfs_acl_chown_setattr(znode_t *zp)
int error;
zfs_acl_t *aclp;
- if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIXACL)
+ if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIX)
return (0);
ASSERT(MUTEX_HELD(&zp->z_lock));
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 389200b52127..15ec7b91b001 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -356,9 +356,9 @@ acltype_changed_cb(void *arg, uint64_t newval)
zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
break;
- case ZFS_ACLTYPE_POSIXACL:
+ case ZFS_ACLTYPE_POSIX:
#ifdef CONFIG_FS_POSIX_ACL
- zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIXACL;
+ zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIX;
zfsvfs->z_sb->s_flags |= SB_POSIXACL;
#else
zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c
index 333c647466cc..9db8bda4cc66 100644
--- a/module/os/linux/zfs/zpl_super.c
+++ b/module/os/linux/zfs/zpl_super.c
@@ -187,10 +187,12 @@ __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
{
char *fsname;
+ ZFS_ENTER(zfsvfs);
fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dmu_objset_name(zfsvfs->z_os, fsname);
seq_puts(seq, fsname);
kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -209,7 +211,7 @@ __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
#ifdef CONFIG_FS_POSIX_ACL
switch (zfsvfs->z_acl_type) {
- case ZFS_ACLTYPE_POSIXACL:
+ case ZFS_ACLTYPE_POSIX:
seq_puts(seq, ",posixacl");
break;
default:
@@ -272,8 +274,12 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
* a txg sync. If the dsl_pool lock is held over sget()
* this can prevent the pool sync and cause a deadlock.
*/
+ dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
+
+ dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
if (IS_ERR(s))
diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c
index fa3c036405b0..9b5fd0fd397b 100644
--- a/module/os/linux/zfs/zpl_xattr.c
+++ b/module/os/linux/zfs/zpl_xattr.c
@@ -1058,7 +1058,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir)
struct posix_acl *acl = NULL;
int error = 0;
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (0);
if (!S_ISLNK(ip->i_mode)) {
@@ -1103,7 +1103,7 @@ zpl_chmod_acl(struct inode *ip)
struct posix_acl *acl;
int error;
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (0);
if (S_ISLNK(ip->i_mode))
@@ -1129,7 +1129,7 @@ __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (0);
if (list && xattr_size <= list_size)
@@ -1146,7 +1146,7 @@ __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (0);
if (list && xattr_size <= list_size)
@@ -1168,7 +1168,7 @@ __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
if (strcmp(name, "") != 0)
return (-EINVAL);
#endif
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
acl = zpl_get_acl(ip, type);
@@ -1196,7 +1196,7 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
if (strcmp(name, "") != 0)
return (-EINVAL);
#endif
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
acl = zpl_get_acl(ip, type);
@@ -1224,7 +1224,7 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
if (strcmp(name, "") != 0)
return (-EINVAL);
#endif
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
if (!inode_owner_or_capable(ip))
@@ -1264,7 +1264,7 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
if (strcmp(name, "") != 0)
return (-EINVAL);
#endif
- if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
if (!inode_owner_or_capable(ip))
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 837b8ae71b34..f3dbbc15d25e 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -253,9 +253,10 @@ zfs_prop_init(void)
static zprop_index_t acltype_table[] = {
{ "off", ZFS_ACLTYPE_OFF },
- { "disabled", ZFS_ACLTYPE_OFF },
- { "noacl", ZFS_ACLTYPE_OFF },
- { "posixacl", ZFS_ACLTYPE_POSIXACL },
+ { "posix", ZFS_ACLTYPE_POSIX },
+ { "disabled", ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */
+ { "noacl", ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */
+ { "posixacl", ZFS_ACLTYPE_POSIX }, /* bkwrd compatibility */
{ NULL }
};
@@ -430,7 +431,7 @@ zfs_prop_init(void)
#ifndef __FreeBSD__
zprop_register_index(ZFS_PROP_ACLTYPE, "acltype", ZFS_ACLTYPE_OFF,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "noacl | posixacl", "ACLTYPE", acltype_table);
+ "off | posix", "ACLTYPE", acltype_table);
#endif
zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
@@ -705,7 +706,7 @@ zfs_prop_init(void)
zprop_register_impl(ZFS_PROP_ACLTYPE, "acltype", PROP_TYPE_INDEX,
ZFS_ACLTYPE_OFF, NULL, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "noacl | posixacl", "ACLTYPE", B_FALSE, B_FALSE, acltype_table);
+ "off | posix", "ACLTYPE", B_FALSE, B_FALSE, acltype_table);
#endif
zprop_register_hidden(ZFS_PROP_REMAPTXG, "remaptxg", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "REMAPTXG");
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 904c325f37a1..7a499298f75c 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
- * Copyright (c) 2011, 2019, Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020, Delphix. All rights reserved.
* Copyright (c) 2014, Saso Kiselkov. All rights reserved.
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
@@ -895,6 +895,12 @@ static void l2arc_read_done(zio_t *);
static void l2arc_do_free_on_write(void);
/*
+ * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
+ * metadata and data are cached from ARC into L2ARC.
+ */
+int l2arc_mfuonly = 0;
+
+/*
* L2ARC TRIM
* l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
* the current write size (l2arc_write_max) we should TRIM if we
@@ -2188,7 +2194,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
ret = SET_ERROR(EIO);
spa_log_error(spa, zb);
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
- spa, NULL, zb, NULL, 0, 0);
+ spa, NULL, zb, NULL, 0);
}
return (ret);
@@ -5654,7 +5660,7 @@ arc_read_done(zio_t *zio)
spa_log_error(zio->io_spa, &acb->acb_zb);
(void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION,
- zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
+ zio->io_spa, NULL, &acb->acb_zb, zio, 0);
}
}
@@ -5931,7 +5937,7 @@ top:
spa_log_error(spa, zb);
(void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION,
- spa, NULL, zb, NULL, 0, 0);
+ spa, NULL, zb, NULL, 0);
}
}
if (rc != 0) {
@@ -8909,6 +8915,15 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
* Copy buffers for L2ARC writing.
*/
for (int try = 0; try < L2ARC_FEED_TYPES; try++) {
+ /*
+ * If try == 1 or 3, we cache MRU metadata and data
+ * respectively.
+ */
+ if (l2arc_mfuonly) {
+ if (try == 1 || try == 3)
+ continue;
+ }
+
multilist_sublist_t *mls = l2arc_sublist_lock(try);
uint64_t passed_sz = 0;
@@ -9174,7 +9189,7 @@ l2arc_feed_thread(void *unused)
cookie = spl_fstrans_mark();
while (l2arc_thread_exit == 0) {
CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait_sig(&l2arc_feed_thr_cv,
+ (void) cv_timedwait_idle(&l2arc_feed_thr_cv,
&l2arc_feed_thr_lock, next);
CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
next = ddi_get_lbolt() + hz;
@@ -9291,8 +9306,6 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
ASSERT(!l2arc_vdev_present(vd));
- vdev_ashift_optimize(vd);
-
/*
* Create a new l2arc device entry.
*/
@@ -10562,6 +10575,9 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
"Min size in bytes to write rebuild log blocks in L2ARC");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
+ "Cache only MFU data from ARC into L2ARC");
+
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 2de1f4e4c267..7d817320aae4 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -718,7 +718,7 @@ dbuf_evict_thread(void *unused)
while (!dbuf_evict_thread_exit) {
while (!dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait_sig_hires(&dbuf_evict_cv,
+ (void) cv_timedwait_idle_hires(&dbuf_evict_cv,
&dbuf_evict_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
CALLB_CPR_SAFE_END(&cpr, &dbuf_evict_lock);
}
diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c
index df10d8d6faae..c53fba75cc51 100644
--- a/module/zfs/dmu_redact.c
+++ b/module/zfs/dmu_redact.c
@@ -568,8 +568,7 @@ commit_rl_updates(objset_t *os, struct merge_data *md, uint64_t object,
uint64_t txg = dmu_tx_get_txg(tx);
if (!md->md_synctask_txg[txg & TXG_MASK]) {
dsl_sync_task_nowait(dmu_tx_pool(tx),
- redaction_list_update_sync, md, 5, ZFS_SPACE_CHECK_NONE,
- tx);
+ redaction_list_update_sync, md, tx);
md->md_synctask_txg[txg & TXG_MASK] = B_TRUE;
md->md_latest_synctask_txg = txg;
}
@@ -1007,10 +1006,14 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
objset_t *os;
struct redact_thread_arg *args = NULL;
redaction_list_t *new_rl = NULL;
+ char *newredactbook;
if ((err = dsl_pool_hold(snapname, FTAG, &dp)) != 0)
return (err);
+ newredactbook = kmem_zalloc(sizeof (char) * ZFS_MAX_DATASET_NAME_LEN,
+ KM_SLEEP);
+
if ((err = dsl_dataset_hold_flags(dp, snapname, DS_HOLD_FLAG_DECRYPT,
FTAG, &ds)) != 0) {
goto out;
@@ -1064,7 +1067,6 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
goto out;
boolean_t resuming = B_FALSE;
- char newredactbook[ZFS_MAX_DATASET_NAME_LEN];
zfs_bookmark_phys_t bookmark;
(void) strlcpy(newredactbook, snapname, ZFS_MAX_DATASET_NAME_LEN);
@@ -1074,6 +1076,10 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
"#%s", redactbook);
if (n >= ZFS_MAX_DATASET_NAME_LEN - (c - newredactbook)) {
dsl_pool_rele(dp, FTAG);
+ kmem_free(newredactbook,
+ sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
+ if (args != NULL)
+ kmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
@@ -1146,16 +1152,23 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
(void) thread_create(NULL, 0, redact_traverse_thread, rta,
0, curproc, TS_RUN, minclsyspri);
}
- struct redact_merge_thread_arg rmta = { { {0} } };
- (void) bqueue_init(&rmta.q, zfs_redact_queue_ff,
+
+ struct redact_merge_thread_arg *rmta;
+ rmta = kmem_zalloc(sizeof (struct redact_merge_thread_arg), KM_SLEEP);
+
+ (void) bqueue_init(&rmta->q, zfs_redact_queue_ff,
zfs_redact_queue_length, offsetof(struct redact_record, ln));
- rmta.numsnaps = numsnaps;
- rmta.spa = os->os_spa;
- rmta.thr_args = args;
- (void) thread_create(NULL, 0, redact_merge_thread, &rmta, 0, curproc,
+ rmta->numsnaps = numsnaps;
+ rmta->spa = os->os_spa;
+ rmta->thr_args = args;
+ (void) thread_create(NULL, 0, redact_merge_thread, rmta, 0, curproc,
TS_RUN, minclsyspri);
- err = perform_redaction(os, new_rl, &rmta);
+ err = perform_redaction(os, new_rl, rmta);
+ kmem_free(rmta, sizeof (struct redact_merge_thread_arg));
+
out:
+ kmem_free(newredactbook, sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
+
if (new_rl != NULL) {
dsl_redaction_list_long_rele(new_rl, FTAG);
dsl_redaction_list_rele(new_rl, FTAG);
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 00536f2774e7..30d20bfefa12 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1197,7 +1197,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_t *dn;
zrl_init(&dnh->dnh_zrlock);
- zrl_tryenter(&dnh->dnh_zrlock);
+ VERIFY3U(1, ==, zrl_tryenter(&dnh->dnh_zrlock));
dn = dnode_create(os, dnp, NULL, object, dnh);
DNODE_VERIFY(dn);
@@ -1949,18 +1949,20 @@ static void
dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx)
{
- dmu_buf_impl_t db_search;
+ dmu_buf_impl_t *db_search;
dmu_buf_impl_t *db;
avl_index_t where;
+ db_search = kmem_zalloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
+
mutex_enter(&dn->dn_dbufs_mtx);
- db_search.db_level = 1;
- db_search.db_blkid = start_blkid + 1;
- db_search.db_state = DB_SEARCH;
+ db_search->db_level = 1;
+ db_search->db_blkid = start_blkid + 1;
+ db_search->db_state = DB_SEARCH;
for (;;) {
- db = avl_find(&dn->dn_dbufs, &db_search, &where);
+ db = avl_find(&dn->dn_dbufs, db_search, &where);
if (db == NULL)
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
@@ -1972,7 +1974,7 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
/*
* Setup the next blkid we want to search for.
*/
- db_search.db_blkid = db->db_blkid + 1;
+ db_search->db_blkid = db->db_blkid + 1;
ASSERT3U(db->db_blkid, >=, start_blkid);
/*
@@ -1992,10 +1994,10 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
/*
* Walk all the in-core level-1 dbufs and verify they have been dirtied.
*/
- db_search.db_level = 1;
- db_search.db_blkid = start_blkid + 1;
- db_search.db_state = DB_SEARCH;
- db = avl_find(&dn->dn_dbufs, &db_search, &where);
+ db_search->db_level = 1;
+ db_search->db_blkid = start_blkid + 1;
+ db_search->db_state = DB_SEARCH;
+ db = avl_find(&dn->dn_dbufs, db_search, &where);
if (db == NULL)
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
for (; db != NULL; db = AVL_NEXT(&dn->dn_dbufs, db)) {
@@ -2005,6 +2007,7 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
ASSERT(db->db_dirtycnt > 0);
}
#endif
+ kmem_free(db_search, sizeof (dmu_buf_impl_t));
mutex_exit(&dn->dn_dbufs_mtx);
}
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 712af664e90f..0ebda2f77074 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -23,6 +23,7 @@
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2016 Gary Mills
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
*/
diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c
index 2d6ca8549eb9..148e8fff2437 100644
--- a/module/zfs/dsl_synctask.c
+++ b/module/zfs/dsl_synctask.c
@@ -170,15 +170,13 @@ dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc,
static void
dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
- int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx,
- boolean_t early)
+ dmu_tx_t *tx, boolean_t early)
{
dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
dst->dst_pool = dp;
dst->dst_txg = dmu_tx_get_txg(tx);
- dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
- dst->dst_space_check = space_check;
+ dst->dst_space_check = ZFS_SPACE_CHECK_NONE;
dst->dst_checkfunc = dsl_null_checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg = arg;
@@ -192,18 +190,16 @@ dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
void
dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
- int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
+ dmu_tx_t *tx)
{
- dsl_sync_task_nowait_common(dp, syncfunc, arg,
- blocks_modified, space_check, tx, B_FALSE);
+ dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_FALSE);
}
void
dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
- int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
+ dmu_tx_t *tx)
{
- dsl_sync_task_nowait_common(dp, syncfunc, arg,
- blocks_modified, space_check, tx, B_TRUE);
+ dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_TRUE);
}
/*
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index c00e08b8d02a..a5003f85d621 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -104,13 +104,15 @@ struct erpt_kstat {
kstat_named_t erpt_set_failed; /* num erpt set failures */
kstat_named_t fmri_set_failed; /* num fmri set failures */
kstat_named_t payload_set_failed; /* num payload set failures */
+ kstat_named_t erpt_duplicates; /* num duplicate erpts */
};
static struct erpt_kstat erpt_kstat_data = {
{ "erpt-dropped", KSTAT_DATA_UINT64 },
{ "erpt-set-failed", KSTAT_DATA_UINT64 },
{ "fmri-set-failed", KSTAT_DATA_UINT64 },
- { "payload-set-failed", KSTAT_DATA_UINT64 }
+ { "payload-set-failed", KSTAT_DATA_UINT64 },
+ { "erpt-duplicates", KSTAT_DATA_UINT64 }
};
kstat_t *fm_ksp;
@@ -568,6 +570,12 @@ out:
return (error);
}
+void
+zfs_zevent_track_duplicate(void)
+{
+ atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
+}
+
static int
zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
{
@@ -1633,6 +1641,8 @@ fm_init(void)
list_create(&zevent_list, sizeof (zevent_t),
offsetof(zevent_t, ev_node));
cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
+
+ zfs_ereport_init();
}
void
@@ -1640,6 +1650,8 @@ fm_fini(void)
{
int count;
+ zfs_ereport_fini();
+
zfs_zevent_drain_all(&count);
mutex_enter(&zevent_lock);
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index ccc247d1557a..133005b227e5 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
index 4170d7e03ebd..99852521b6d1 100644
--- a/module/zfs/mmp.c
+++ b/module/zfs/mmp.c
@@ -198,14 +198,6 @@ mmp_init(spa_t *spa)
cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL);
mmp->mmp_kstat_id = 1;
-
- /*
- * mmp_write_done() calculates mmp_delay based on prior mmp_delay and
- * the elapsed time since the last write. For the first mmp write,
- * there is no "last write", so we start with fake non-zero values.
- */
- mmp->mmp_last_write = gethrtime();
- mmp->mmp_delay = MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval));
}
void
@@ -557,6 +549,18 @@ mmp_thread(void *arg)
mmp_thread_enter(mmp, &cpr);
+ /*
+ * There have been no MMP writes yet. Setting mmp_last_write here gives
+ * us one mmp_fail_ns period, which is consistent with the activity
+ * check duration, to try to land an MMP write before MMP suspends the
+ * pool (if so configured).
+ */
+
+ mutex_enter(&mmp->mmp_io_lock);
+ mmp->mmp_last_write = gethrtime();
+ mmp->mmp_delay = MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval));
+ mutex_exit(&mmp->mmp_io_lock);
+
while (!mmp->mmp_thread_exiting) {
hrtime_t next_time = gethrtime() +
MSEC2NSEC(MMP_DEFAULT_INTERVAL);
@@ -671,7 +675,7 @@ mmp_thread(void *arg)
}
CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
+ (void) cv_timedwait_idle_hires(&mmp->mmp_thread_cv,
&mmp->mmp_thread_lock, next_time, USEC2NSEC(100),
CALLOUT_FLAG_ABSOLUTE);
CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c
index 2c0e4b860a04..2ce0139c9137 100644
--- a/module/zfs/range_tree.c
+++ b/module/zfs/range_tree.c
@@ -24,6 +24,7 @@
*/
/*
* Copyright (c) 2013, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index aac469f44b59..532f04b91ca1 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -1000,13 +1000,25 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
/*
* The write issue taskq can be extremely CPU
* intensive. Run it at slightly less important
- * priority than the other taskqs. Under Linux this
- * means incrementing the priority value on platforms
- * like illumos it should be decremented.
+ * priority than the other taskqs.
+ *
+ * Under Linux and FreeBSD this means incrementing
+ * the priority value as opposed to platforms like
+ * illumos where it should be decremented.
+ *
+ * On FreeBSD, if priorities divided by four (RQ_PPQ)
+ * are equal then a difference between them is
+ * insignificant.
*/
- if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
+ if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) {
+#if defined(__linux__)
pri++;
-
+#elif defined(__FreeBSD__)
+ pri += 4;
+#else
+#error "unknown OS"
+#endif
+ }
tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
}
@@ -2485,11 +2497,12 @@ spa_livelist_delete_cb(void *arg, zthr_t *z)
VERIFY0(dsl_get_next_livelist_obj(mos, zap_obj, &ll_obj));
VERIFY0(zap_count(mos, ll_obj, &count));
if (count > 0) {
- dsl_deadlist_t ll = { 0 };
+ dsl_deadlist_t *ll;
dsl_deadlist_entry_t *dle;
bplist_t to_free;
- dsl_deadlist_open(&ll, mos, ll_obj);
- dle = dsl_deadlist_first(&ll);
+ ll = kmem_zalloc(sizeof (dsl_deadlist_t), KM_SLEEP);
+ dsl_deadlist_open(ll, mos, ll_obj);
+ dle = dsl_deadlist_first(ll);
ASSERT3P(dle, !=, NULL);
bplist_create(&to_free);
int err = dsl_process_sub_livelist(&dle->dle_bpobj, &to_free,
@@ -2497,7 +2510,7 @@ spa_livelist_delete_cb(void *arg, zthr_t *z)
if (err == 0) {
sublist_delete_arg_t sync_arg = {
.spa = spa,
- .ll = &ll,
+ .ll = ll,
.key = dle->dle_mintxg,
.to_free = &to_free
};
@@ -2512,7 +2525,8 @@ spa_livelist_delete_cb(void *arg, zthr_t *z)
}
bplist_clear(&to_free);
bplist_destroy(&to_free);
- dsl_deadlist_close(&ll);
+ dsl_deadlist_close(ll);
+ kmem_free(ll, sizeof (dsl_deadlist_t));
} else {
livelist_delete_arg_t sync_arg = {
.spa = spa,
@@ -2688,8 +2702,7 @@ spa_livelist_condense_cb(void *arg, zthr_t *t)
lca->first_size = first_size;
lca->next_size = next_size;
dsl_sync_task_nowait(spa_get_dsl(spa),
- spa_livelist_condense_sync, lca, 0,
- ZFS_SPACE_CHECK_NONE, tx);
+ spa_livelist_condense_sync, lca, tx);
dmu_tx_commit(tx);
return;
}
@@ -2869,7 +2882,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
}
if (error != EBADF) {
(void) zfs_ereport_post(ereport, spa,
- NULL, NULL, NULL, 0, 0);
+ NULL, NULL, NULL, 0);
}
}
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
@@ -5749,7 +5762,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
for (int c = 0; error == 0 && c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
- vdev_ashift_optimize(vd);
vdev_metaslab_set_size(vd);
vdev_expand(vd, txg);
}
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
index b98b7badbae1..dacba127dcfa 100644
--- a/module/zfs/spa_config.c
+++ b/module/zfs/spa_config.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright 2017 Joyent, Inc.
*/
@@ -316,7 +316,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
if (target->spa_ccw_fail_time == 0) {
(void) zfs_ereport_post(
FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
- target, NULL, NULL, NULL, 0, 0);
+ target, NULL, NULL, NULL, 0);
}
target->spa_ccw_fail_time = gethrtime();
spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
@@ -577,10 +577,8 @@ spa_config_update(spa_t *spa, int what)
(tvd->vdev_islog && tvd->vdev_removing))
continue;
- if (tvd->vdev_ms_array == 0) {
- vdev_ashift_optimize(tvd);
+ if (tvd->vdev_ms_array == 0)
vdev_metaslab_set_size(tvd);
- }
vdev_expand(tvd, txg);
}
}
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index f47adb94d55b..2ab58815400a 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -397,8 +397,7 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
/* Kick this off asynchronously; errors are ignored. */
- dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
- nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
+ dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx);
dmu_tx_commit(tx);
/* spa_history_log_sync will free nvl */
@@ -532,7 +531,7 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
spa_history_log_sync(nvl, tx);
} else {
dsl_sync_task_nowait(spa_get_dsl(spa),
- spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
+ spa_history_log_sync, nvl, tx);
}
/* spa_history_log_sync() will free nvl */
}
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index a5f2b041737b..65375b579da6 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -242,16 +242,11 @@ txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time)
{
CALLB_CPR_SAFE_BEGIN(cpr);
- /*
- * cv_wait_sig() is used instead of cv_wait() in order to prevent
- * this process from incorrectly contributing to the system load
- * average when idle.
- */
if (time) {
- (void) cv_timedwait_sig(cv, &tx->tx_sync_lock,
+ (void) cv_timedwait_idle(cv, &tx->tx_sync_lock,
ddi_get_lbolt() + time);
} else {
- cv_wait_sig(cv, &tx->tx_sync_lock);
+ cv_wait_idle(cv, &tx->tx_sync_lock);
}
CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
@@ -760,7 +755,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
if (should_quiesce == B_TRUE) {
cv_wait_io(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
} else {
- cv_wait_sig(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
+ cv_wait_idle(&tx->tx_quiesce_done_cv,
+ &tx->tx_sync_lock);
}
}
mutex_exit(&tx->tx_sync_lock);
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 95a2f5947db1..a94101485c94 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1481,7 +1481,7 @@ vdev_probe_done(zio_t *zio)
ASSERT(zio->io_error != 0);
vdev_dbgmsg(vd, "failed probe");
(void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
- spa, vd, NULL, NULL, 0, 0);
+ spa, vd, NULL, NULL, 0);
zio->io_error = SET_ERROR(ENXIO);
}
@@ -1673,6 +1673,38 @@ vdev_set_deflate_ratio(vdev_t *vd)
}
/*
+ * Maximize performance by inflating the configured ashift for top level
+ * vdevs to be as close to the physical ashift as possible while maintaining
+ * administrator defined limits and ensuring it doesn't go below the
+ * logical ashift.
+ */
+static void
+vdev_ashift_optimize(vdev_t *vd)
+{
+ ASSERT(vd == vd->vdev_top);
+
+ if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+ vd->vdev_ashift = MIN(
+ MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
+ MAX(zfs_vdev_min_auto_ashift,
+ vd->vdev_physical_ashift));
+ } else {
+ /*
+ * If the logical and physical ashifts are the same, then
+ * we ensure that the top-level vdev's ashift is not smaller
+ * than our minimum ashift value. For the unusual case
+ * where logical ashift > physical ashift, we can't cap
+ * the calculated ashift based on max ashift as that
+ * would cause failures.
+ * We still check if we need to increase it to match
+ * the min ashift.
+ */
+ vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
+ vd->vdev_ashift);
+ }
+}
+
+/*
* Prepare a virtual device for access.
*/
int
@@ -1830,16 +1862,17 @@ vdev_open(vdev_t *vd)
return (SET_ERROR(EINVAL));
}
+ /*
+ * We can always set the logical/physical ashift members since
+ * their values are only used to calculate the vdev_ashift when
+ * the device is first added to the config. These values should
+ * not be used for anything else since they may change whenever
+ * the device is reopened and we don't store them in the label.
+ */
vd->vdev_physical_ashift =
MAX(physical_ashift, vd->vdev_physical_ashift);
- vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
- vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
-
- if (vd->vdev_logical_ashift > ASHIFT_MAX) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_ASHIFT_TOO_BIG);
- return (SET_ERROR(EDOM));
- }
+ vd->vdev_logical_ashift = MAX(logical_ashift,
+ vd->vdev_logical_ashift);
if (vd->vdev_asize == 0) {
/*
@@ -1848,6 +1881,24 @@ vdev_open(vdev_t *vd)
*/
vd->vdev_asize = asize;
vd->vdev_max_asize = max_asize;
+
+ /*
+ * If the vdev_ashift was not overriden at creation time,
+ * then set it the logical ashift and optimize the ashift.
+ */
+ if (vd->vdev_ashift == 0) {
+ vd->vdev_ashift = vd->vdev_logical_ashift;
+
+ if (vd->vdev_logical_ashift > ASHIFT_MAX) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_ASHIFT_TOO_BIG);
+ return (SET_ERROR(EDOM));
+ }
+
+ if (vd->vdev_top == vd) {
+ vdev_ashift_optimize(vd);
+ }
+ }
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
vd->vdev_ashift > ASHIFT_MAX)) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
@@ -1862,11 +1913,10 @@ vdev_open(vdev_t *vd)
vd->vdev_ops->vdev_op_leaf) {
(void) zfs_ereport_post(
FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
- spa, vd, NULL, NULL, 0, 0);
+ spa, vd, NULL, NULL, 0);
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LABEL);
return (SET_ERROR(EDOM));
-
}
vd->vdev_max_asize = max_asize;
}
@@ -2445,35 +2495,6 @@ vdev_metaslab_set_size(vdev_t *vd)
ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT);
}
-/*
- * Maximize performance by inflating the configured ashift for top level
- * vdevs to be as close to the physical ashift as possible while maintaining
- * administrator defined limits and ensuring it doesn't go below the
- * logical ashift.
- */
-void
-vdev_ashift_optimize(vdev_t *vd)
-{
- if (vd == vd->vdev_top) {
- if (vd->vdev_ashift < vd->vdev_physical_ashift) {
- vd->vdev_ashift = MIN(
- MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
- MAX(zfs_vdev_min_auto_ashift,
- vd->vdev_physical_ashift));
- } else {
- /*
- * Unusual case where logical ashift > physical ashift
- * so we can't cap the calculated ashift based on max
- * ashift as that would cause failures.
- * We still check if we need to increase it to match
- * the min ashift.
- */
- vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
- vd->vdev_ashift);
- }
- }
-}
-
void
vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
{
@@ -4759,7 +4780,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
}
(void) zfs_ereport_post(class, spa, vd, NULL, NULL,
- save_state, 0);
+ save_state);
}
/* Erase any notion of persistent removed state */
diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c
index 6bc2d917d59c..12ee393bd5db 100644
--- a/module/zfs/vdev_indirect.c
+++ b/module/zfs/vdev_indirect.c
@@ -16,7 +16,7 @@
/*
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
- * Copyright (c) 2014, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2020 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -576,8 +576,7 @@ spa_condense_indirect_commit_entry(spa_t *spa,
*/
if (list_is_empty(&sci->sci_new_mapping_entries[txgoff])) {
dsl_sync_task_nowait(dmu_tx_pool(tx),
- spa_condense_indirect_commit_sync, sci,
- 0, ZFS_SPACE_CHECK_NONE, tx);
+ spa_condense_indirect_commit_sync, sci, tx);
}
vdev_indirect_mapping_entry_t *vime =
@@ -1474,13 +1473,14 @@ vdev_indirect_all_checksum_errors(zio_t *zio)
vdev_t *vd = ic->ic_vdev;
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&vd->vdev_stat_lock);
-
- (void) zfs_ereport_post_checksum(zio->io_spa, vd,
+ int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
NULL, zio, is->is_target_offset, is->is_size,
NULL, NULL, NULL);
+ if (ret != EALREADY) {
+ mutex_enter(&vd->vdev_stat_lock);
+ vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&vd->vdev_stat_lock);
+ }
}
}
}
diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c
index ab711441d9ca..7ff7fffcc80e 100644
--- a/module/zfs/vdev_initialize.c
+++ b/module/zfs/vdev_initialize.c
@@ -126,7 +126,7 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
- guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+ guid, tx);
switch (new_state) {
case VDEV_INITIALIZE_ACTIVE:
@@ -216,8 +216,7 @@ vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data)
/* This is the first write of this txg. */
dsl_sync_task_nowait(spa_get_dsl(spa),
- vdev_initialize_zap_update_sync, guid, 2,
- ZFS_SPACE_CHECK_RESERVED, tx);
+ vdev_initialize_zap_update_sync, guid, tx);
}
/*
diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c
index 8c7468255565..7fab7d0d7950 100644
--- a/module/zfs/vdev_label.c
+++ b/module/zfs/vdev_label.c
@@ -149,6 +149,8 @@
#include <sys/dsl_scan.h>
#include <sys/abd.h>
#include <sys/fs/zfs.h>
+#include <sys/byteorder.h>
+#include <sys/zfs_bootenv.h>
/*
* Basic routines to read and write from a vdev label.
@@ -1233,13 +1235,9 @@ vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags)
* bootloader should have rewritten them all to be the same on boot,
* and any changes we made since boot have been the same across all
* labels.
- *
- * While grub supports writing to all four labels, other bootloaders
- * don't, so we only use the first two labels to store boot
- * information.
*/
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
- for (int l = 0; l < VDEV_LABELS / 2; l++) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
vdev_label_read(zio, vd, l,
abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE),
offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE,
@@ -1249,14 +1247,15 @@ vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags)
}
int
-vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *command)
+vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv)
{
+ nvlist_t *config;
spa_t *spa = rvd->vdev_spa;
abd_t *abd = NULL;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
- ASSERT(command);
+ ASSERT(bootenv);
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
zio_t *zio = zio_root(spa, NULL, &abd, flags);
@@ -1264,39 +1263,81 @@ vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *command)
int err = zio_wait(zio);
if (abd != NULL) {
+ char *buf;
vdev_boot_envblock_t *vbe = abd_to_buf(abd);
- if (vbe->vbe_version != VB_RAW) {
- abd_free(abd);
- return (SET_ERROR(ENOTSUP));
+
+ vbe->vbe_version = ntohll(vbe->vbe_version);
+ switch (vbe->vbe_version) {
+ case VB_RAW:
+ /*
+ * if we have textual data in vbe_bootenv, create nvlist
+ * with key "envmap".
+ */
+ fnvlist_add_uint64(bootenv, BOOTENV_VERSION, VB_RAW);
+ vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
+ fnvlist_add_string(bootenv, GRUB_ENVMAP,
+ vbe->vbe_bootenv);
+ break;
+
+ case VB_NVLIST:
+ err = nvlist_unpack(vbe->vbe_bootenv,
+ sizeof (vbe->vbe_bootenv), &config, 0);
+ if (err == 0) {
+ fnvlist_merge(bootenv, config);
+ nvlist_free(config);
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ /* Check for FreeBSD zfs bootonce command string */
+ buf = abd_to_buf(abd);
+ if (*buf == '\0') {
+ fnvlist_add_uint64(bootenv, BOOTENV_VERSION,
+ VB_NVLIST);
+ break;
+ }
+ fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf);
}
- vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
- fnvlist_add_string(command, "envmap", vbe->vbe_bootenv);
- /* abd was allocated in vdev_label_read_bootenv_impl() */
+
+ /*
+ * abd was allocated in vdev_label_read_bootenv_impl()
+ */
abd_free(abd);
- /* If we managed to read any successfully, return success. */
+ /*
+ * If we managed to read any successfully,
+ * return success.
+ */
return (0);
}
return (err);
}
int
-vdev_label_write_bootenv(vdev_t *vd, char *envmap)
+vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env)
{
zio_t *zio;
spa_t *spa = vd->vdev_spa;
vdev_boot_envblock_t *bootenv;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
- int error = ENXIO;
+ int error;
+ size_t nvsize;
+ char *nvbuf;
+
+ error = nvlist_size(env, &nvsize, NV_ENCODE_XDR);
+ if (error != 0)
+ return (SET_ERROR(error));
- if (strlen(envmap) >= sizeof (bootenv->vbe_bootenv)) {
+ if (nvsize >= sizeof (bootenv->vbe_bootenv)) {
return (SET_ERROR(E2BIG));
}
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+ error = ENXIO;
for (int c = 0; c < vd->vdev_children; c++) {
- int child_err = vdev_label_write_bootenv(vd->vdev_child[c],
- envmap);
+ int child_err;
+
+ child_err = vdev_label_write_bootenv(vd->vdev_child[c], env);
/*
* As long as any of the disks managed to write all of their
* labels successfully, return success.
@@ -1312,16 +1353,41 @@ vdev_label_write_bootenv(vdev_t *vd, char *envmap)
ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE);
abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
abd_zero(abd, VDEV_PAD_SIZE);
+
bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE);
+ nvbuf = bootenv->vbe_bootenv;
+ nvsize = sizeof (bootenv->vbe_bootenv);
+
+ bootenv->vbe_version = fnvlist_lookup_uint64(env, BOOTENV_VERSION);
+ switch (bootenv->vbe_version) {
+ case VB_RAW:
+ if (nvlist_lookup_string(env, GRUB_ENVMAP, &nvbuf) == 0) {
+ (void) strlcpy(bootenv->vbe_bootenv, nvbuf, nvsize);
+ }
+ error = 0;
+ break;
- char *buf = bootenv->vbe_bootenv;
- (void) strlcpy(buf, envmap, sizeof (bootenv->vbe_bootenv));
- bootenv->vbe_version = VB_RAW;
- abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE);
+ case VB_NVLIST:
+ error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR,
+ KM_SLEEP);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error == 0) {
+ bootenv->vbe_version = htonll(bootenv->vbe_version);
+ abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE);
+ } else {
+ abd_free(abd);
+ return (SET_ERROR(error));
+ }
retry:
zio = zio_root(spa, NULL, NULL, flags);
- for (int l = 0; l < VDEV_LABELS / 2; l++) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
vdev_label_write(zio, vd, l, abd,
offsetof(vdev_label_t, vl_be),
VDEV_PAD_SIZE, NULL, NULL, flags);
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
index 5e1060f127c9..71b5adbbd06a 100644
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@@ -391,7 +391,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
*physical_ashift = MAX(*physical_ashift,
- vd->vdev_physical_ashift);
+ cvd->vdev_physical_ashift);
}
if (numerrors == vd->vdev_children) {
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index 4320078b6f7c..47312e02f70a 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
*/
@@ -1790,16 +1790,17 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
zio_bad_cksum_t zbc;
raidz_map_t *rm = zio->io_vsd;
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&vd->vdev_stat_lock);
-
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- (void) zfs_ereport_post_checksum(zio->io_spa, vd,
+ int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
&zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
rc->rc_abd, bad_data, &zbc);
+ if (ret != EALREADY) {
+ mutex_enter(&vd->vdev_stat_lock);
+ vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&vd->vdev_stat_lock);
+ }
}
}
@@ -2279,21 +2280,21 @@ vdev_raidz_io_done(zio_t *zio)
vdev_t *cvd;
rc = &rm->rm_col[c];
cvd = vd->vdev_child[rc->rc_devidx];
- if (rc->rc_error == 0) {
- zio_bad_cksum_t zbc;
- zbc.zbc_has_cksum = 0;
- zbc.zbc_injected =
- rm->rm_ecksuminjected;
+ if (rc->rc_error != 0)
+ continue;
+ zio_bad_cksum_t zbc;
+ zbc.zbc_has_cksum = 0;
+ zbc.zbc_injected = rm->rm_ecksuminjected;
+
+ int ret = zfs_ereport_start_checksum(
+ zio->io_spa, cvd, &zio->io_bookmark, zio,
+ rc->rc_offset, rc->rc_size,
+ (void *)(uintptr_t)c, &zbc);
+ if (ret != EALREADY) {
mutex_enter(&cvd->vdev_stat_lock);
cvd->vdev_stat.vs_checksum_errors++;
mutex_exit(&cvd->vdev_stat_lock);
-
- zfs_ereport_start_checksum(
- zio->io_spa, cvd,
- &zio->io_bookmark, zio,
- rc->rc_offset, rc->rc_size,
- (void *)(uintptr_t)c, &zbc);
}
}
}
diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
index 85ed8afe1cf4..3362d608c037 100644
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@@ -267,7 +267,7 @@ vdev_rebuild_initiate(vdev_t *vd)
vd->vdev_rebuilding = B_TRUE;
dsl_sync_task_nowait(spa_get_dsl(spa), vdev_rebuild_initiate_sync,
- (void *)(uintptr_t)vd->vdev_id, 0, ZFS_SPACE_CHECK_NONE, tx);
+ (void *)(uintptr_t)vd->vdev_id, tx);
dmu_tx_commit(tx);
vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_START);
@@ -553,8 +553,7 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
vr->vr_scan_offset[txg & TXG_MASK] = start;
dsl_sync_task_nowait(spa_get_dsl(spa),
vdev_rebuild_update_sync,
- (void *)(uintptr_t)vd->vdev_id, 2,
- ZFS_SPACE_CHECK_RESERVED, tx);
+ (void *)(uintptr_t)vd->vdev_id, tx);
}
/* When exiting write out our progress. */
@@ -875,16 +874,14 @@ vdev_rebuild_thread(void *arg)
* by a pool checkpoint. See the dsl_scan_done() comments.
*/
dsl_sync_task_nowait(dp, vdev_rebuild_complete_sync,
- (void *)(uintptr_t)vd->vdev_id, 0,
- ZFS_SPACE_CHECK_NONE, tx);
+ (void *)(uintptr_t)vd->vdev_id, tx);
} else if (vd->vdev_rebuild_cancel_wanted) {
/*
* The rebuild operation was canceled. This will occur when
* a device participating in the rebuild is detached.
*/
dsl_sync_task_nowait(dp, vdev_rebuild_cancel_sync,
- (void *)(uintptr_t)vd->vdev_id, 0,
- ZFS_SPACE_CHECK_NONE, tx);
+ (void *)(uintptr_t)vd->vdev_id, tx);
} else if (vd->vdev_rebuild_reset_wanted) {
/*
* Reset the running rebuild without canceling and restarting
@@ -892,8 +889,7 @@ vdev_rebuild_thread(void *arg)
* participate in the rebuild.
*/
dsl_sync_task_nowait(dp, vdev_rebuild_reset_sync,
- (void *)(uintptr_t)vd->vdev_id, 0,
- ZFS_SPACE_CHECK_NONE, tx);
+ (void *)(uintptr_t)vd->vdev_id, tx);
} else {
/*
* The rebuild operation should be suspended. This may occur
diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c
index 56e420871f61..fdeca7ab3418 100644
--- a/module/zfs/vdev_removal.c
+++ b/module/zfs/vdev_removal.c
@@ -1167,8 +1167,8 @@ vdev_remove_replace_with_indirect(vdev_t *vd, uint64_t txg)
/* After this, we can not use svr. */
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
- dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_remove_complete_sync, svr,
- 0, ZFS_SPACE_CHECK_NONE, tx);
+ dsl_sync_task_nowait(spa->spa_dsl_pool,
+ vdev_remove_complete_sync, svr, tx);
dmu_tx_commit(tx);
}
@@ -1317,7 +1317,7 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca,
if (svr->svr_max_offset_to_sync[txg & TXG_MASK] == 0) {
dsl_sync_task_nowait(dmu_tx_pool(tx), vdev_mapping_sync,
- svr, 0, ZFS_SPACE_CHECK_NONE, tx);
+ svr, tx);
}
svr->svr_max_offset_to_sync[txg & TXG_MASK] = range_tree_max(segs);
@@ -2143,8 +2143,7 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
vdev_config_dirty(vd);
dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, *txg);
dsl_sync_task_nowait(spa->spa_dsl_pool,
- vdev_remove_initiate_sync,
- (void *)(uintptr_t)vd->vdev_id, 0, ZFS_SPACE_CHECK_NONE, tx);
+ vdev_remove_initiate_sync, (void *)(uintptr_t)vd->vdev_id, tx);
dmu_tx_commit(tx);
return (0);
diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c
index 3f8c34806020..02b42ddd5a6c 100644
--- a/module/zfs/vdev_trim.c
+++ b/module/zfs/vdev_trim.c
@@ -317,7 +317,7 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
dsl_sync_task_nowait(spa_get_dsl(spa), vdev_trim_zap_update_sync,
- guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+ guid, tx);
switch (new_state) {
case VDEV_TRIM_ACTIVE:
@@ -481,7 +481,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
if (ta->trim_type == TRIM_TYPE_MANUAL) {
while (vd->vdev_trim_rate != 0 && !vdev_trim_should_stop(vd) &&
vdev_trim_calculate_rate(ta) > vd->vdev_trim_rate) {
- cv_timedwait_sig(&vd->vdev_trim_io_cv,
+ cv_timedwait_idle(&vd->vdev_trim_io_cv,
&vd->vdev_trim_io_lock, ddi_get_lbolt() +
MSEC_TO_TICK(10));
}
@@ -510,8 +510,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
/* This is the first write of this txg. */
dsl_sync_task_nowait(spa_get_dsl(spa),
- vdev_trim_zap_update_sync, guid, 2,
- ZFS_SPACE_CHECK_RESERVED, tx);
+ vdev_trim_zap_update_sync, guid, tx);
}
/*
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index ad13ccedfc06..a8341f50ba09 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012,2020 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -101,7 +101,251 @@
* good and bad versions of the buffer (if available), and we annotate the
* ereport with information about the differences.
*/
+
#ifdef _KERNEL
+/*
+ * Duplicate ereport Detection
+ *
+ * Some ereports are retained momentarily for detecting duplicates. These
+ * are kept in a recent_events_node_t in both a time-ordered list and an AVL
+ * tree of recent unique ereports.
+ *
+ * The lifespan of these recent ereports is bounded (15 mins) and a cleaner
+ * task is used to purge stale entries.
+ */
+static list_t recent_events_list;
+static avl_tree_t recent_events_tree;
+static kmutex_t recent_events_lock;
+static taskqid_t recent_events_cleaner_tqid;
+
+/*
+ * Each node is about 128 bytes so 2,000 would consume 1/4 MiB.
+ *
+ * This setting can be changed dynamically and setting it to zero
+ * disables duplicate detection.
+ */
+unsigned int zfs_zevent_retain_max = 2000;
+
+/*
+ * The lifespan for a recent ereport entry. The default of 15 minutes is
+ * intended to outlive the zfs diagnosis engine's threshold of 10 errors
+ * over a period of 10 minutes.
+ */
+unsigned int zfs_zevent_retain_expire_secs = 900;
+
+typedef enum zfs_subclass {
+ ZSC_IO,
+ ZSC_DATA,
+ ZSC_CHECKSUM
+} zfs_subclass_t;
+
+typedef struct {
+ /* common criteria */
+ uint64_t re_pool_guid;
+ uint64_t re_vdev_guid;
+ int re_io_error;
+ uint64_t re_io_size;
+ uint64_t re_io_offset;
+ zfs_subclass_t re_subclass;
+ zio_priority_t re_io_priority;
+
+ /* logical zio criteria (optional) */
+ zbookmark_phys_t re_io_bookmark;
+
+ /* internal state */
+ avl_node_t re_tree_link;
+ list_node_t re_list_link;
+ uint64_t re_timestamp;
+} recent_events_node_t;
+
+static int
+recent_events_compare(const void *a, const void *b)
+{
+ const recent_events_node_t *node1 = a;
+ const recent_events_node_t *node2 = b;
+ int cmp;
+
+ /*
+ * The comparison order here is somewhat arbitrary.
+ * What's important is that if every criteria matches, then it
+ * is a duplicate (i.e. compare returns 0)
+ */
+ if ((cmp = TREE_CMP(node1->re_subclass, node2->re_subclass)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_pool_guid, node2->re_pool_guid)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_vdev_guid, node2->re_vdev_guid)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_io_error, node2->re_io_error)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_io_priority, node2->re_io_priority)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_io_size, node2->re_io_size)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(node1->re_io_offset, node2->re_io_offset)) != 0)
+ return (cmp);
+
+ const zbookmark_phys_t *zb1 = &node1->re_io_bookmark;
+ const zbookmark_phys_t *zb2 = &node2->re_io_bookmark;
+
+ if ((cmp = TREE_CMP(zb1->zb_objset, zb2->zb_objset)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(zb1->zb_object, zb2->zb_object)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(zb1->zb_level, zb2->zb_level)) != 0)
+ return (cmp);
+ if ((cmp = TREE_CMP(zb1->zb_blkid, zb2->zb_blkid)) != 0)
+ return (cmp);
+
+ return (0);
+}
+
+static void zfs_ereport_schedule_cleaner(void);
+
+/*
+ * background task to clean stale recent event nodes.
+ */
+/*ARGSUSED*/
+static void
+zfs_ereport_cleaner(void *arg)
+{
+ recent_events_node_t *entry;
+ uint64_t now = gethrtime();
+
+ /*
+ * purge expired entries
+ */
+ mutex_enter(&recent_events_lock);
+ while ((entry = list_tail(&recent_events_list)) != NULL) {
+ uint64_t age = NSEC2SEC(now - entry->re_timestamp);
+ if (age <= zfs_zevent_retain_expire_secs)
+ break;
+
+ /* remove expired node */
+ avl_remove(&recent_events_tree, entry);
+ list_remove(&recent_events_list, entry);
+ kmem_free(entry, sizeof (*entry));
+ }
+
+ /* Restart the cleaner if more entries remain */
+ recent_events_cleaner_tqid = 0;
+ if (!list_is_empty(&recent_events_list))
+ zfs_ereport_schedule_cleaner();
+
+ mutex_exit(&recent_events_lock);
+}
+
+static void
+zfs_ereport_schedule_cleaner(void)
+{
+ ASSERT(MUTEX_HELD(&recent_events_lock));
+
+ uint64_t timeout = SEC2NSEC(zfs_zevent_retain_expire_secs + 1);
+
+ recent_events_cleaner_tqid = taskq_dispatch_delay(
+ system_delay_taskq, zfs_ereport_cleaner, NULL, TQ_SLEEP,
+ ddi_get_lbolt() + NSEC_TO_TICK(timeout));
+}
+
+/*
+ * Check if an ereport would be a duplicate of one recently posted.
+ *
+ * An ereport is considered a duplicate if the set of criteria in
+ * recent_events_node_t all match.
+ *
+ * Only FM_EREPORT_ZFS_IO, FM_EREPORT_ZFS_DATA, and FM_EREPORT_ZFS_CHECKSUM
+ * are candidates for duplicate checking.
+ */
+static boolean_t
+zfs_ereport_is_duplicate(const char *subclass, spa_t *spa, vdev_t *vd,
+ const zbookmark_phys_t *zb, zio_t *zio, uint64_t offset, uint64_t size)
+{
+ recent_events_node_t search = {0}, *entry;
+
+ if (vd == NULL || zio == NULL)
+ return (B_FALSE);
+
+ if (zfs_zevent_retain_max == 0)
+ return (B_FALSE);
+
+ if (strcmp(subclass, FM_EREPORT_ZFS_IO) == 0)
+ search.re_subclass = ZSC_IO;
+ else if (strcmp(subclass, FM_EREPORT_ZFS_DATA) == 0)
+ search.re_subclass = ZSC_DATA;
+ else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0)
+ search.re_subclass = ZSC_CHECKSUM;
+ else
+ return (B_FALSE);
+
+ search.re_pool_guid = spa_guid(spa);
+ search.re_vdev_guid = vd->vdev_guid;
+ search.re_io_error = zio->io_error;
+ search.re_io_priority = zio->io_priority;
+ /* if size is supplied use it over what's in zio */
+ if (size) {
+ search.re_io_size = size;
+ search.re_io_offset = offset;
+ } else {
+ search.re_io_size = zio->io_size;
+ search.re_io_offset = zio->io_offset;
+ }
+
+ /* grab optional logical zio criteria */
+ if (zb != NULL) {
+ search.re_io_bookmark.zb_objset = zb->zb_objset;
+ search.re_io_bookmark.zb_object = zb->zb_object;
+ search.re_io_bookmark.zb_level = zb->zb_level;
+ search.re_io_bookmark.zb_blkid = zb->zb_blkid;
+ }
+
+ uint64_t now = gethrtime();
+
+ mutex_enter(&recent_events_lock);
+
+ /* check if we have seen this one recently */
+ entry = avl_find(&recent_events_tree, &search, NULL);
+ if (entry != NULL) {
+ uint64_t age = NSEC2SEC(now - entry->re_timestamp);
+
+ /*
+ * There is still an active cleaner (since we're here).
+ * Reset the last seen time for this duplicate entry
+ * so that its lifespand gets extended.
+ */
+ list_remove(&recent_events_list, entry);
+ list_insert_head(&recent_events_list, entry);
+ entry->re_timestamp = now;
+
+ zfs_zevent_track_duplicate();
+ mutex_exit(&recent_events_lock);
+
+ return (age <= zfs_zevent_retain_expire_secs);
+ }
+
+ if (avl_numnodes(&recent_events_tree) >= zfs_zevent_retain_max) {
+ /* recycle oldest node */
+ entry = list_tail(&recent_events_list);
+ ASSERT(entry != NULL);
+ list_remove(&recent_events_list, entry);
+ avl_remove(&recent_events_tree, entry);
+ } else {
+ entry = kmem_alloc(sizeof (recent_events_node_t), KM_SLEEP);
+ }
+
+ /* record this as a recent ereport */
+ *entry = search;
+ avl_add(&recent_events_tree, entry);
+ list_insert_head(&recent_events_list, entry);
+ entry->re_timestamp = now;
+
+ /* Start a cleaner if not already scheduled */
+ if (recent_events_cleaner_tqid == 0)
+ zfs_ereport_schedule_cleaner();
+
+ mutex_exit(&recent_events_lock);
+ return (B_FALSE);
+}
+
void
zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
{
@@ -153,9 +397,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
uint64_t ena;
char class[64];
- if (!zfs_ereport_is_valid(subclass, spa, vd, zio))
- return (B_FALSE);
-
if ((ereport = fm_nvlist_create(NULL)) == NULL)
return (B_FALSE);
@@ -336,6 +577,8 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
DATA_TYPE_UINT64, zio->io_timestamp, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA,
DATA_TYPE_UINT64, zio->io_delta, NULL);
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
+ DATA_TYPE_UINT32, zio->io_priority, NULL);
/*
* If the 'size' parameter is non-zero, it indicates this is a
@@ -788,24 +1031,34 @@ zfs_ereport_is_valid(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio)
}
/*
- * Return 0 if event was posted, EINVAL if there was a problem posting it or
- * EBUSY if the event was rate limited.
+ * Post an ereport for the given subclass
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
*/
int
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
- const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
- uint64_t size)
+ const zbookmark_phys_t *zb, zio_t *zio, uint64_t state)
{
int rc = 0;
#ifdef _KERNEL
nvlist_t *ereport = NULL;
nvlist_t *detector = NULL;
+ if (!zfs_ereport_is_valid(subclass, spa, vd, zio))
+ return (EINVAL);
+
+ if (zfs_ereport_is_duplicate(subclass, spa, vd, zb, zio, 0, 0))
+ return (SET_ERROR(EALREADY));
+
if (zfs_is_ratelimiting_event(subclass, vd))
return (SET_ERROR(EBUSY));
if (!zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
- zb, zio, stateoroffset, size))
+ zb, zio, state, 0))
return (SET_ERROR(EINVAL)); /* couldn't post event */
if (ereport == NULL)
@@ -817,7 +1070,16 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
return (rc);
}
-void
+/*
+ * Prepare a checksum ereport
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
+ */
+int
zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length, void *arg,
zio_bad_cksum_t *info)
@@ -825,8 +1087,15 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
zio_cksum_report_t *report;
#ifdef _KERNEL
+ if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
+ return (SET_ERROR(EINVAL));
+
+ if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
+ offset, length))
+ return (SET_ERROR(EALREADY));
+
if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
- return;
+ return (SET_ERROR(EBUSY));
#endif
report = kmem_zalloc(sizeof (*report), KM_SLEEP);
@@ -851,7 +1120,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
if (report->zcr_ereport == NULL) {
zfs_ereport_free_checksum(report);
- return;
+ return (0);
}
#endif
@@ -859,6 +1128,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
report->zcr_next = zio->io_logical->io_cksum_report;
zio->io_logical->io_cksum_report = report;
mutex_exit(&spa->spa_errlist_lock);
+ return (0);
}
void
@@ -901,7 +1171,15 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
kmem_free(rpt, sizeof (*rpt));
}
-
+/*
+ * Post a checksum ereport
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
+ */
int
zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length,
@@ -913,8 +1191,15 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
nvlist_t *detector = NULL;
zfs_ecksum_info_t *info;
+ if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
+ return (SET_ERROR(EINVAL));
+
+ if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
+ offset, length))
+ return (SET_ERROR(EALREADY));
+
if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
- return (EBUSY);
+ return (SET_ERROR(EBUSY));
if (!zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
spa, vd, zb, zio, offset, length) || (ereport == NULL)) {
@@ -1073,11 +1358,57 @@ zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate)
#endif
}
-#if defined(_KERNEL)
+#ifdef _KERNEL
+void
+zfs_ereport_init(void)
+{
+ mutex_init(&recent_events_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&recent_events_list, sizeof (recent_events_node_t),
+ offsetof(recent_events_node_t, re_list_link));
+ avl_create(&recent_events_tree, recent_events_compare,
+ sizeof (recent_events_node_t), offsetof(recent_events_node_t,
+ re_tree_link));
+}
+
+/*
+ * This 'early' fini needs to run before zfs_fini() which on Linux waits
+ * for the system_delay_taskq to drain.
+ */
+void
+zfs_ereport_taskq_fini(void)
+{
+ mutex_enter(&recent_events_lock);
+ if (recent_events_cleaner_tqid != 0) {
+ taskq_cancel_id(system_delay_taskq, recent_events_cleaner_tqid);
+ recent_events_cleaner_tqid = 0;
+ }
+ mutex_exit(&recent_events_lock);
+}
+
+void
+zfs_ereport_fini(void)
+{
+ recent_events_node_t *entry;
+
+ while ((entry = list_head(&recent_events_list)) != NULL) {
+ avl_remove(&recent_events_tree, entry);
+ list_remove(&recent_events_list, entry);
+ kmem_free(entry, sizeof (*entry));
+ }
+ avl_destroy(&recent_events_tree);
+ list_destroy(&recent_events_list);
+ mutex_destroy(&recent_events_lock);
+}
+
EXPORT_SYMBOL(zfs_ereport_post);
EXPORT_SYMBOL(zfs_ereport_is_valid);
EXPORT_SYMBOL(zfs_ereport_post_checksum);
EXPORT_SYMBOL(zfs_post_remove);
EXPORT_SYMBOL(zfs_post_autoreplace);
EXPORT_SYMBOL(zfs_post_state_change);
+
+ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_max, UINT, ZMOD_RW,
+ "Maximum recent zevents records to retain for duplicate checking");
+ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_expire_secs, UINT, ZMOD_RW,
+ "Expiration time for recent zevents records");
#endif /* _KERNEL */
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 495ff4707d77..eff66b32fcb1 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -3511,30 +3511,29 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
/*
* This ioctl is used to set the bootenv configuration on the current
* pool. This configuration is stored in the second padding area of the label,
- * and it is used by the GRUB bootloader used on Linux to store the contents
- * of the grubenv file. The file is stored as raw ASCII, and is protected by
- * an embedded checksum. By default, GRUB will check if the boot filesystem
- * supports storing the environment data in a special location, and if so,
- * will invoke filesystem specific logic to retrieve it. This can be overridden
- * by a variable, should the user so desire.
+ * and it is used by the bootloader(s) to store the bootloader and/or system
+ * specific data.
+ * The data is stored as nvlist data stream, and is protected by
+ * an embedded checksum.
+ * The version can have two possible values:
+ * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
+ * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
*/
-/* ARGSUSED */
static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
- {"envmap", DATA_TYPE_STRING, 0},
+ {"version", DATA_TYPE_UINT64, 0},
+ {"<keys>", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
};
static int
zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
{
- char *envmap;
int error;
spa_t *spa;
- envmap = fnvlist_lookup_string(innvl, "envmap");
if ((error = spa_open(name, &spa, FTAG)) != 0)
return (error);
spa_vdev_state_enter(spa, SCL_ALL);
- error = vdev_label_write_bootenv(spa->spa_root_vdev, envmap);
+ error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
(void) spa_vdev_state_exit(spa, NULL, 0);
spa_close(spa, FTAG);
return (error);
@@ -3544,7 +3543,6 @@ static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
/* no nvl keys */
};
-/* ARGSUSED */
static int
zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
{
@@ -7615,6 +7613,7 @@ zfs_kmod_fini(void)
kmem_free(zs, sizeof (zfsdev_state_t));
}
+ zfs_ereport_taskq_fini(); /* run before zfs_fini() on Linux */
zfs_fini();
spa_fini();
zvol_fini();
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index f956a9ef7621..8a8fbccd7d63 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Klara Inc.
@@ -547,7 +547,7 @@ error:
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, &zio->io_bookmark);
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
- spa, NULL, &zio->io_bookmark, zio, 0, 0);
+ spa, NULL, &zio->io_bookmark, zio, 0);
}
} else {
zio->io_error = ret;
@@ -2004,7 +2004,7 @@ zio_deadman_impl(zio_t *pio, int ziodepth)
zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
pio->io_offset, pio->io_size, pio->io_error);
(void) zfs_ereport_post(FM_EREPORT_ZFS_DEADMAN,
- pio->io_spa, vd, zb, pio, 0, 0);
+ pio->io_spa, vd, zb, pio, 0);
if (failmode == ZIO_FAILURE_MODE_CONTINUE &&
taskq_empty_ent(&pio->io_tqent)) {
@@ -2331,7 +2331,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
"failure and has been suspended.\n", spa_name(spa));
(void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
- NULL, NULL, 0, 0);
+ NULL, NULL, 0);
mutex_enter(&spa->spa_suspend_lock);
@@ -4217,13 +4217,15 @@ zio_checksum_verify(zio_t *zio)
zio->io_error = error;
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- mutex_enter(&zio->io_vd->vdev_stat_lock);
- zio->io_vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&zio->io_vd->vdev_stat_lock);
-
- zfs_ereport_start_checksum(zio->io_spa,
+ int ret = zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, NULL, &info);
+
+ if (ret != EALREADY) {
+ mutex_enter(&zio->io_vd->vdev_stat_lock);
+ zio->io_vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&zio->io_vd->vdev_stat_lock);
+ }
}
}
@@ -4543,7 +4545,7 @@ zio_done(zio_t *zio)
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
- zio, 0, 0);
+ zio, 0);
}
}
}
@@ -4557,16 +4559,16 @@ zio_done(zio_t *zio)
*/
if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
!vdev_is_dead(zio->io_vd)) {
- mutex_enter(&zio->io_vd->vdev_stat_lock);
- if (zio->io_type == ZIO_TYPE_READ) {
- zio->io_vd->vdev_stat.vs_read_errors++;
- } else if (zio->io_type == ZIO_TYPE_WRITE) {
- zio->io_vd->vdev_stat.vs_write_errors++;
+ int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO,
+ zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
+ if (ret != EALREADY) {
+ mutex_enter(&zio->io_vd->vdev_stat_lock);
+ if (zio->io_type == ZIO_TYPE_READ)
+ zio->io_vd->vdev_stat.vs_read_errors++;
+ else if (zio->io_type == ZIO_TYPE_WRITE)
+ zio->io_vd->vdev_stat.vs_write_errors++;
+ mutex_exit(&zio->io_vd->vdev_stat_lock);
}
- mutex_exit(&zio->io_vd->vdev_stat_lock);
-
- (void) zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
- zio->io_vd, &zio->io_bookmark, zio, 0, 0);
}
if ((zio->io_error == EIO || !(zio->io_flags &
@@ -4578,7 +4580,7 @@ zio_done(zio_t *zio)
*/
spa_log_error(zio->io_spa, &zio->io_bookmark);
(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
- zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0);
+ zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
}
}
diff --git a/module/zfs/zthr.c b/module/zfs/zthr.c
index fdc4b863382c..5ac2e30467e3 100644
--- a/module/zfs/zthr.c
+++ b/module/zfs/zthr.c
@@ -56,7 +56,7 @@
*
* == ZTHR creation
*
- * Every zthr needs three inputs to start running:
+ * Every zthr needs four inputs to start running:
*
* 1] A user-defined checker function (checkfunc) that decides whether
* the zthr should start working or go to sleep. The function should
@@ -72,6 +72,9 @@
* 3] A void args pointer that will be passed to checkfunc and func
* implicitly by the infrastructure.
*
+ * 4] A name for the thread. This string must be valid for the lifetime
+ * of the zthr.
+ *
* The reason why the above API needs two different functions,
* instead of one that both checks and does the work, has to do with
* the zthr's internal state lock (zthr_state_lock) and the allowed
@@ -221,6 +224,7 @@ struct zthr {
zthr_checkfunc_t *zthr_checkfunc;
zthr_func_t *zthr_func;
void *zthr_arg;
+ const char *zthr_name;
};
static void
@@ -237,15 +241,10 @@ zthr_procedure(void *arg)
t->zthr_func(t->zthr_arg, t);
mutex_enter(&t->zthr_state_lock);
} else {
- /*
- * cv_wait_sig() is used instead of cv_wait() in
- * order to prevent this process from incorrectly
- * contributing to the system load average when idle.
- */
if (t->zthr_sleep_timeout == 0) {
- cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock);
+ cv_wait_idle(&t->zthr_cv, &t->zthr_state_lock);
} else {
- (void) cv_timedwait_sig_hires(&t->zthr_cv,
+ (void) cv_timedwait_idle_hires(&t->zthr_cv,
&t->zthr_state_lock, t->zthr_sleep_timeout,
MSEC2NSEC(1), 0);
}
@@ -296,6 +295,7 @@ zthr_create_timer(const char *zthr_name, zthr_checkfunc_t *checkfunc,
t->zthr_func = func;
t->zthr_arg = arg;
t->zthr_sleep_timeout = max_sleep;
+ t->zthr_name = zthr_name;
t->zthr_thread = thread_create_named(zthr_name, NULL, 0,
zthr_procedure, t, 0, &p0, TS_RUN, minclsyspri);
@@ -422,8 +422,8 @@ zthr_resume(zthr_t *t)
* no-op.
*/
if (t->zthr_thread == NULL) {
- t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
- 0, &p0, TS_RUN, minclsyspri);
+ t->zthr_thread = thread_create_named(t->zthr_name, NULL, 0,
+ zthr_procedure, t, 0, &p0, TS_RUN, minclsyspri);
}
mutex_exit(&t->zthr_state_lock);
diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
index e715c8569a9e..c410620a8f9b 100644
--- a/rpm/generic/zfs.spec.in
+++ b/rpm/generic/zfs.spec.in
@@ -491,6 +491,7 @@ systemctl --system daemon-reload >/dev/null || true
%files -n libzfs2-devel
%{_pkgconfigdir}/libzfs.pc
+%{_pkgconfigdir}/libzfsbootenv.pc
%{_pkgconfigdir}/libzfs_core.pc
%{_libdir}/*.so
%{_includedir}/*
diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh
index ae927691139f..ea6dc7eb271e 100755
--- a/scripts/zfs-tests.sh
+++ b/scripts/zfs-tests.sh
@@ -308,7 +308,7 @@ constrain_path() {
usage() {
cat << EOF
USAGE:
-$0 [hvqxkfS] [-s SIZE] [-r RUNFILES] [-t PATH] [-u USER]
+$0 [-hvqxkfS] [-s SIZE] [-r RUNFILES] [-t PATH] [-u USER]
DESCRIPTION:
ZFS Test Suite launch script
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index fcd9684603b4..725afe2f054a 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -346,7 +346,7 @@ tags = ['functional', 'cli_root', 'zpool_detach']
[tests/functional/cli_root/zpool_events]
tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow',
- 'zpool_events_poolname', 'zpool_events_errors']
+ 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates']
tags = ['functional', 'cli_root', 'zpool_events']
[tests/functional/cli_root/zpool_export]
diff --git a/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am b/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am
index ba02f93fe2ab..cd462208957c 100644
--- a/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am
+++ b/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am
@@ -4,6 +4,13 @@ pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
pkgexec_PROGRAMS = libzfs_input_check
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
libzfs_input_check_SOURCES = libzfs_input_check.c
libzfs_input_check_LDADD = \
$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
index 9fee37357fc3..63217104f3fe 100644
--- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
+++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
@@ -25,7 +25,9 @@
#include <libzutil.h>
#include <sys/nvpair.h>
+#include <sys/vdev_impl.h>
#include <sys/zfs_ioctl.h>
+#include <sys/zfs_bootenv.h>
/*
* Test the nvpair inputs for the non-legacy zfs ioctl commands.
@@ -762,9 +764,10 @@ test_set_bootenv(const char *pool)
{
nvlist_t *required = fnvlist_alloc();
- fnvlist_add_string(required, "envmap", "test");
+ fnvlist_add_uint64(required, "version", VB_RAW);
+ fnvlist_add_string(required, GRUB_ENVMAP, "test");
- IOC_INPUT_TEST(ZFS_IOC_SET_BOOTENV, pool, required, NULL, 0);
+ IOC_INPUT_TEST_WILD(ZFS_IOC_SET_BOOTENV, pool, required, NULL, 0);
nvlist_free(required);
}
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
index bf8b67e750fb..4c11bf146378 100644
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -105,6 +105,7 @@ export SYSTEM_FILES_COMMON='arp
umask
umount
uname
+ uniq
uuidgen
vmstat
wait
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index ad2811395332..fab852a0a607 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -76,12 +76,14 @@ TRIM_TXG_BATCH trim.txg_batch zfs_trim_txg_batch
TXG_HISTORY txg.history zfs_txg_history
TXG_TIMEOUT txg.timeout zfs_txg_timeout
UNLINK_SUSPEND_PROGRESS UNSUPPORTED zfs_unlink_suspend_progress
+VDEV_FILE_PHYSICAL_ASHIFT vdev.file.physical_ashift vdev_file_physical_ashift
VDEV_MIN_MS_COUNT vdev.min_ms_count zfs_vdev_min_ms_count
VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
+ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
ZIO_SLOW_IO_MS zio.slow_io_ms zio_slow_io_ms
%%%%
while read name FreeBSD Linux; do
diff --git a/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh b/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh
index 66124fe9cc31..d62bf9c346b6 100755
--- a/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh
@@ -34,7 +34,7 @@
#
# DESCRIPTION:
-# Verify that user can access file/directory if acltype=posixacl.
+# Verify that user can access file/directory if acltype=posix.
#
# STRATEGY:
# 1. Test access to file (mode=rw-)
@@ -50,7 +50,7 @@ function cleanup
rmdir $TESTDIR/dir.0
}
-log_assert "Verify acltype=posixacl works on file"
+log_assert "Verify acltype=posix works on file"
log_onexit cleanup
# Test access to FILE
diff --git a/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh b/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh
index 1aceffd15692..d9b5036458f8 100755
--- a/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh
@@ -34,7 +34,7 @@
#
# DESCRIPTION:
-# Verify that user can access file/directory if acltype=posixacl.
+# Verify that user can access file/directory if acltype=posix.
#
# STRATEGY:
# 1. Test access to directory (mode=-wx)
@@ -43,7 +43,7 @@
#
verify_runnable "both"
-log_assert "Verify acltype=posixacl works on directory"
+log_assert "Verify acltype=posix works on directory"
# Test access to DIRECTORY
log_note "Testing access to DIRECTORY"
diff --git a/tests/zfs-tests/tests/functional/acl/posix/setup.ksh b/tests/zfs-tests/tests/functional/acl/posix/setup.ksh
index 5d6d15864134..d8bf8a638e7b 100755
--- a/tests/zfs-tests/tests/functional/acl/posix/setup.ksh
+++ b/tests/zfs-tests/tests/functional/acl/posix/setup.ksh
@@ -46,7 +46,7 @@ default_setup_noexit $DISK
log_must chmod 777 $TESTDIR
# Use POSIX ACLs on filesystem
-log_must zfs set acltype=posixacl $TESTPOOL/$TESTFS
+log_must zfs set acltype=posix $TESTPOOL/$TESTFS
log_must zfs set xattr=sa $TESTPOOL/$TESTFS
log_pass
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
index d9ae88e0792e..89cc4b0d3082 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -35,13 +36,15 @@
# STRATEGY:
# 1. Create a pool with default values.
# 2. Verify 'zpool add -o ashift=<n>' works with allowed values (9-16).
-# 3. Verify 'zpool add -o ashift=<n>' doesn't accept other invalid values.
+# 3. Verify setting kernel tunable for file vdevs works correctly.
+# 4. Verify 'zpool add -o ashift=<n>' doesn't accept other invalid values.
#
verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
poolexists $TESTPOOL && destroy_pool $TESTPOOL
rm -f $disk1 $disk2
}
@@ -54,6 +57,8 @@ disk2=$TEST_BASE_DIR/disk2
log_must mkfile $SIZE $disk1
log_must mkfile $SIZE $disk2
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
do
@@ -69,6 +74,24 @@ do
log_must zpool destroy $TESTPOOL
log_must zpool labelclear $disk1
log_must zpool labelclear $disk2
+
+ #
+ # Make sure we can also set the ashift using the tunable.
+ #
+ log_must zpool create $TESTPOOL $disk1
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $ashift
+ log_must zpool add $TESTPOOL $disk2
+ verify_ashift $disk2 $ashift
+ if [[ $? -ne 0 ]]
+ then
+ log_fail "Device was added without setting ashift value to "\
+ "$ashift"
+ fi
+ # clean things for the next run
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
+ log_must zpool destroy $TESTPOOL
+ log_must zpool labelclear $disk1
+ log_must zpool labelclear $disk2
done
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh
index 37887f3d115e..4637fe0d84a3 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -43,6 +44,7 @@ verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
poolexists $TESTPOOL && destroy_pool $TESTPOOL
log_must rm -f $disk1 $disk2
}
@@ -55,6 +57,14 @@ disk2=$TEST_BASE_DIR/disk2
log_must mkfile $SIZE $disk1
log_must mkfile $SIZE $disk2
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
do
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh
index 9af011cb04d8..618c6992edb4 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -41,6 +42,7 @@ verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
rm -f $disk1 $disk2
}
@@ -53,6 +55,14 @@ disk2=$TEST_BASE_DIR/disk2
log_must truncate -s $SIZE $disk1
log_must truncate -s $SIZE $disk2
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
do
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh
index 6ea1573241f0..655f887b60ad 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh
@@ -68,7 +68,7 @@ set -A RW_FS_PROP "quota=536870912" \
"setuid=off" \
"readonly=on" \
"snapdir=visible" \
- "acltype=posixacl" \
+ "acltype=posix" \
"aclinherit=discard" \
"canmount=off"
if is_freebsd; then
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh
index 349f73fecca5..4a918c0a683a 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh
@@ -68,7 +68,7 @@ set -A RW_FS_PROP "quota=536870912" \
"setuid=off" \
"readonly=on" \
"snapdir=visible" \
- "acltype=posixacl" \
+ "acltype=posix" \
"aclinherit=discard" \
"canmount=off"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore b/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore
new file mode 100644
index 000000000000..a1f8c14838fa
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore
@@ -0,0 +1 @@
+/ereports
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
index 7fb6e4f7a5c2..99c46f0143c2 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
@@ -1,4 +1,8 @@
+include $(top_srcdir)/config/Rules.am
+
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
+
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
@@ -6,8 +10,16 @@ dist_pkgdata_SCRIPTS = \
zpool_events_cliargs.ksh \
zpool_events_follow.ksh \
zpool_events_poolname.ksh \
- zpool_events_errors.ksh
+ zpool_events_errors.ksh \
+ zpool_events_duplicates.ksh
dist_pkgdata_DATA = \
zpool_events.cfg \
zpool_events.kshlib
+
+ereports_LDADD = \
+ $(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+ $(abs_top_builddir)/lib/libzfs/libzfs.la
+
+pkgexec_PROGRAMS = ereports
+ereports_SOURCES = ereports.c
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c b/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c
new file mode 100644
index 000000000000..f825240000f7
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c
@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <libzfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/nvpair.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+
+/*
+ * Command to output io and checksum ereport values, one per line.
+ * Used by zpool_events_duplicates.ksh to check for duplicate events.
+ *
+ * example output line:
+ *
+ * checksum "error_pool" 0x856dd01ce52e336 0x000034 0x000400 0x000a402c00
+ * 0x000004 0x000000 0x000000 0x000000 0x000001
+ */
+
+/*
+ * Our ereport duplicate criteria
+ *
+ * When the class and all of these values match, then an ereport is
+ * considered to be a duplicate.
+ */
+static const char *criteria_name[] = {
+ FM_EREPORT_PAYLOAD_ZFS_POOL,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
+
+ /* logical zio criteriai (optional) */
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
+};
+
+#define CRITERIA_NAMES_COUNT ARRAY_SIZE(criteria_name)
+
+static void
+print_ereport_line(nvlist_t *nvl)
+{
+ char *class;
+ int last = CRITERIA_NAMES_COUNT - 1;
+
+ /*
+ * For the test case context, we only want to see 'io' and
+ * 'checksum' subclass. We skip 'data' to minimize the output.
+ */
+ if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0 ||
+ strstr(class, "ereport.fs.zfs.") == NULL ||
+ strcmp(class, "ereport.fs.zfs.data") == 0) {
+ return;
+ }
+
+ (void) printf("%s\t", class + strlen("ereport.fs.zfs."));
+
+ for (int i = 0; i < CRITERIA_NAMES_COUNT; i++) {
+ nvpair_t *nvp;
+ uint32_t i32 = 0;
+ uint64_t i64 = 0;
+ char *str = NULL;
+
+ if (nvlist_lookup_nvpair(nvl, criteria_name[i], &nvp) != 0) {
+ /* print a proxy for optional criteria */
+ (void) printf("--------");
+ (void) printf("%c", i == last ? '\n' : '\t');
+ continue;
+ }
+
+ switch (nvpair_type(nvp)) {
+ case DATA_TYPE_STRING:
+ (void) nvpair_value_string(nvp, &str);
+ (void) printf("\"%s\"", str ? str : "<NULL>");
+ break;
+
+ case DATA_TYPE_INT32:
+ (void) nvpair_value_int32(nvp, (void *)&i32);
+ (void) printf("0x%06x", i32);
+ break;
+
+ case DATA_TYPE_UINT32:
+ (void) nvpair_value_uint32(nvp, &i32);
+ (void) printf("0x%06x", i32);
+ break;
+
+ case DATA_TYPE_INT64:
+ (void) nvpair_value_int64(nvp, (void *)&i64);
+ (void) printf("0x%06llx", (u_longlong_t)i64);
+ break;
+
+ case DATA_TYPE_UINT64:
+ (void) nvpair_value_uint64(nvp, &i64);
+ if (strcmp(FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
+ criteria_name[i]) == 0)
+ (void) printf("0x%010llx", (u_longlong_t)i64);
+ else
+ (void) printf("0x%06llx", (u_longlong_t)i64);
+ break;
+ default:
+ (void) printf("<unknown>");
+ break;
+ }
+ (void) printf("%c", i == last ? '\n' : '\t');
+ }
+}
+
+static void
+ereports_dump(libzfs_handle_t *zhdl, int zevent_fd)
+{
+ nvlist_t *nvl;
+ int ret, dropped;
+
+ while (1) {
+ ret = zpool_events_next(zhdl, &nvl, &dropped, ZEVENT_NONBLOCK,
+ zevent_fd);
+ if (ret || nvl == NULL)
+ break;
+ if (dropped > 0)
+ (void) fprintf(stdout, "dropped %d events\n", dropped);
+ print_ereport_line(nvl);
+ (void) fflush(stdout);
+ nvlist_free(nvl);
+ }
+}
+
+/* ARGSUSED */
+int
+main(int argc, char **argv)
+{
+ libzfs_handle_t *hdl;
+ int fd;
+
+ hdl = libzfs_init();
+ if (hdl == NULL) {
+ (void) fprintf(stderr, "libzfs_init: %s\n", strerror(errno));
+ exit(2);
+ }
+ fd = open(ZFS_DEV, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "open: %s\n", strerror(errno));
+ libzfs_fini(hdl);
+ exit(2);
+ }
+
+ ereports_dump(hdl, fd);
+
+ (void) close(fd);
+ libzfs_fini(hdl);
+
+ return (0);
+}
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
new file mode 100644
index 000000000000..1ba7b1b34496
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
@@ -0,0 +1,155 @@
+#!/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION:
+# Verify that duplicate I/O ereport errors are not posted
+#
+# STRATEGY:
+# 1. Create a mirror pool
+# 2. Inject duplicate read/write IO errors and checksum errors
+# 3. Verify there are no duplicate events being posted
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+MOUNTDIR=$TEST_BASE_DIR/mount
+FILEPATH=$MOUNTDIR/badfile
+VDEV1=$TEST_BASE_DIR/vfile1
+VDEV2=$TEST_BASE_DIR/vfile2
+POOL=error_pool
+FILESIZE="10M"
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
+
+EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
+
+duplicates=false
+
+function cleanup
+{
+ log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
+
+ log_must zinject -c all
+ if poolexists $POOL ; then
+ destroy_pool $POOL
+ fi
+ log_must rm -f $VDEV1 $VDEV2
+}
+
+log_assert "Duplicate I/O ereport errors are not posted"
+log_note "zevent retain max setting: $RETAIN_MAX"
+
+log_onexit cleanup
+
+# Set our threshold high to avoid dropping events.
+set_tunable64 ZEVENT_LEN_MAX 20000
+
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2
+log_must mkdir -p $MOUNTDIR
+
+#
+# $1: test type - corrupt (checksum error), io
+# $2: read, write
+function do_dup_test
+{
+ ERR=$1
+ RW=$2
+
+ log_note "Testing $ERR $RW ereports"
+ log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL mirror $VDEV1 $VDEV2
+ log_must zpool events -c
+ log_must zfs set compression=off $POOL
+
+ if [ "$RW" == "read" ] ; then
+ log_must mkfile $FILESIZE $FILEPATH
+
+ # unmount and mount filesystems to purge file from ARC
+ # to force reads to go through error inject handler
+ log_must zfs unmount $POOL
+ log_must zfs mount $POOL
+
+ # all reads from this file get an error
+ if [ "$ERR" == "corrupt" ] ; then
+ log_must zinject -a -t data -e checksum -T read $FILEPATH
+ else
+ log_must zinject -a -t data -e io -T read $FILEPATH
+ fi
+
+ # Read the file a few times to generate some
+ # duplicate errors of the same blocks
+ # shellcheck disable=SC2034
+ for i in {1..15}; do
+ dd if=$FILEPATH of=/dev/null bs=128K > /dev/null 2>&1
+ done
+ log_must zinject -c all
+ fi
+
+ log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL
+
+ if [ "$RW" == "write" ] ; then
+ log_must mkfile $FILESIZE $FILEPATH
+ log_must zpool sync $POOL
+ else
+ # scrub twice to generate some duplicates
+ log_must zpool scrub $POOL
+ log_must zpool wait -t scrub $POOL
+ log_must zpool scrub $POOL
+ log_must zpool wait -t scrub $POOL
+ fi
+
+ log_must zinject -c all
+
+ # Wait for the pool to settle down and finish resilvering (if
+ # necessary). We want the errors to stop incrementing before we
+ # check for duplicates.
+ zpool wait -t resilver $POOL
+
+ ereports="$($EREPORTS | sort)"
+ actual=$(echo "$ereports" | wc -l)
+ unique=$(echo "$ereports" | uniq | wc -l)
+ log_note "$actual total $ERR $RW ereports where $unique were unique"
+
+ if [ $actual -gt $unique ] ; then
+ log_note "UNEXPECTED -- $((actual-unique)) duplicate $ERR $RW ereports"
+ echo "$ereports"
+ duplicates=true
+ fi
+
+ log_must zpool destroy $POOL
+}
+
+do_dup_test "corrupt" "read"
+do_dup_test "io" "read"
+do_dup_test "io" "write"
+
+if $duplicates; then
+ log_fail "FAILED -- Duplicate I/O ereport errors encountered"
+else
+ log_pass "Duplicate I/O ereport errors are not posted"
+fi
+
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh
index 0fc2c69ba143..1b18b1297a78 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -41,6 +42,7 @@ verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
rm -f $disk1 $disk2
}
@@ -53,6 +55,14 @@ disk2=$TEST_BASE_DIR/disk2
log_must truncate -s $SIZE $disk1
log_must truncate -s $SIZE $disk2
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
do
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh
index fbdd44ceb581..f076f26818eb 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -43,6 +44,7 @@ verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
rm -f $disk1 $disk2
}
@@ -55,6 +57,14 @@ disk2=$TEST_BASE_DIR/disk2
log_must truncate -s $SIZE $disk1
log_must truncate -s $SIZE $disk2
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
do
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh
index 86e692fadafc..09b5f50d5e18 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh
@@ -22,6 +22,7 @@
#
# Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@@ -41,6 +42,7 @@ verify_runnable "global"
function cleanup
{
+ log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
destroy_pool $TESTPOOL1
rm -f $disk
}
@@ -52,6 +54,14 @@ log_onexit cleanup
log_assert "zpool set can modify 'ashift' property"
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
disk=$TEST_BASE_DIR/disk
log_must mkfile $SIZE $disk
log_must zpool create $TESTPOOL1 $disk
diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg b/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
index af867ded5374..cd0cf771e1fb 100644
--- a/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
+++ b/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
@@ -41,7 +41,7 @@ if is_linux; then
# zfs get/set subcommands - ordered as per the list above so we
# can iterate over both sets in an array
PROP_VALS="\
- posixacl on \
+ posix on \
fletcher2 on on \
on legacy none on \
128K none on \
@@ -49,7 +49,7 @@ if is_linux; then
# these are an alternate set of property values
PROP_ALTVALS="\
- noacl off \
+ off off \
fletcher4 lzjb off \
off /tmp/zfstest 100M off \
512 10m off \
@@ -66,7 +66,7 @@ elif is_freebsd; then
# zfs get/set subcommands - ordered as per the list above so we
# can iterate over both sets in an array
PROP_VALS="\
- posixacl on \
+ posix on \
fletcher2 on on \
on legacy none on \
128K none on \
@@ -74,7 +74,7 @@ elif is_freebsd; then
# these are an alternate set of property values
PROP_ALTVALS="\
- noacl off \
+ off off \
fletcher4 lzjb off \
off /tmp/zfstest 100M off \
512 10m off \
diff --git a/tests/zfs-tests/tests/functional/history/history_002_pos.ksh b/tests/zfs-tests/tests/functional/history/history_002_pos.ksh
index 33fa33a4f516..b077603e828f 100755
--- a/tests/zfs-tests/tests/functional/history/history_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/history/history_002_pos.ksh
@@ -72,8 +72,8 @@ props=(
mountpoint /history.$$ mountpoint legacy
mountpoint none compression lz4
compression on compression off
- compression lzjb acltype noacl
- acltype posixacl xattr sa
+ compression lzjb acltype off
+ acltype posix xattr sa
atime on atime off
devices on devices off
exec on exec off
diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
index 0441f7ff32be..499c05fc9835 100755
--- a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
@@ -116,7 +116,7 @@ for fs in "$POOL" "$POOL/pclone" "$POOL/$FS" "$POOL/$FS/fs1" \
"$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do
rand_set_prop $fs aclinherit "discard" "noallow" "secure" "passthrough"
rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
- rand_set_prop $fs acltype "off" "noacl" "posixacl"
+ rand_set_prop $fs acltype "off" "posix" "noacl" "posixacl"
rand_set_prop $fs atime "on" "off"
rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
rand_set_prop $fs compression "${compress_prop_vals[@]}"