From b828cf1d01d291de2ab43f207f243898c763f7a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 14 May 2021 04:17:31 +0200 Subject: zed-functions.sh: zed_lock(): don't truncate lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By appending instead of truncating, we can lock on any file (with write permissions) instead of only dedicated lock files, since the locking process itself no longer alters the file in any way Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12042 --- cmd/zed/zed.d/zed-functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh index c4ed5aa8ac7a..9f8531d737a6 100644 --- a/cmd/zed/zed.d/zed-functions.sh +++ b/cmd/zed/zed.d/zed-functions.sh @@ -126,7 +126,7 @@ zed_lock() # Obtain a lock on the file bound to the given file descriptor. # - eval "exec ${fd}> '${lockfile}'" + eval "exec ${fd}>> '${lockfile}'" if ! err="$(flock --exclusive "${fd}" 2>&1)"; then zed_log_err "failed to lock \"${lockfile}\": ${err}" fi -- cgit v1.2.3 From e20c9330d7b68c41a4e9b19dc6bd4760b6052b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 14 May 2021 04:18:20 +0200 Subject: zed.d/all-debug.sh: simplify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By locking the log file itself, we can omit arduous rebinding and explicit umask setting, but, perhaps more importantly, avoid permanently littering /var/lock/ with zed.debug.log.lock we will never delete It is imperative that the previous commit ("zed-functions.sh: zed_lock(): don't truncate lock") be included in any series that contains this one Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12042 --- cmd/zed/zed.d/Makefile.am | 3 +++ cmd/zed/zed.d/all-debug.sh | 16 ++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am index 3eece353ef90..2c8173b3e769 100644 --- a/cmd/zed/zed.d/Makefile.am +++ b/cmd/zed/zed.d/Makefile.am @@ -52,3 +52,6 @@ install-data-hook: ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ done chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" + +# False positive: 1>&"${ZED_FLOCK_FD}" looks suspiciously similar to a >&filename bash extension +CHECKBASHISMS_IGNORE = -e 'should be >word 2>&1' -e '&"$${ZED_FLOCK_FD}"' diff --git a/cmd/zed/zed.d/all-debug.sh b/cmd/zed/zed.d/all-debug.sh index 14b39caacd9d..824c9fe423d7 100755 --- a/cmd/zed/zed.d/all-debug.sh +++ b/cmd/zed/zed.d/all-debug.sh @@ -12,15 +12,11 @@ zed_exit_if_ignoring_this_event -lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock" +zed_lock "${ZED_DEBUG_LOG}" +{ + printenv | sort + echo +} 1>&"${ZED_FLOCK_FD}" +zed_unlock "${ZED_DEBUG_LOG}" -umask 077 -zed_lock "${lockfile}" -exec >> "${ZED_DEBUG_LOG}" - -printenv | sort -echo - -exec >&- -zed_unlock "${lockfile}" exit 0 -- cgit v1.2.3 From 1fcfc21cd8cce3d44b2526f262be5c73e4d8621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 14 May 2021 04:22:34 +0200 Subject: zed.d/history_event-zfs-list-cacher.sh.in: parallelise, simplify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This: (a) improves the error log message, (b) locks per pool instead of globally, (c) locks the actual output file instead of /var/lock/zfs-list, which would otherwise linger there forever (well, still will, but you can remove it and it won't come back), and (d) preserves attributes of the output file instead of reverting them to 0:0 644 It is imperative that the previous commit ("zed-functions.sh: zed_lock(): don't truncate lock") be included in any series that contains this one Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12042 --- cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in index 15f0a8ed6189..db40fa36d668 100755 --- a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in +++ b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in @@ -3,9 +3,8 @@ # Track changes to enumerated pools for use in early-boot set -ef -FSLIST_DIR="@sysconfdir@/zfs/zfs-list.cache" -FSLIST_TMP="@runstatedir@/zfs-list.cache.new" -FSLIST="${FSLIST_DIR}/${ZEVENT_POOL}" +FSLIST="@sysconfdir@/zfs/zfs-list.cache/${ZEVENT_POOL}" +FSLIST_TMP="@runstatedir@/zfs-list.cache@${ZEVENT_POOL}" # If the pool specific cache file is not writeable, abort [ -w "${FSLIST}" ] || exit 0 @@ -19,15 +18,15 @@ zed_check_cmd "${ZFS}" sort diff # If we are acting on a snapshot, we have nothing to do [ "${ZEVENT_HISTORY_DSNAME%@*}" = "${ZEVENT_HISTORY_DSNAME}" ] || exit 0 -# We obtain a lock on zfs-list to avoid any simultaneous writes. +# We lock the output file to avoid simultaneous writes. # If we run into trouble, log and drop the lock abort_alter() { - zed_log_msg "Error updating zfs-list.cache!" - zed_unlock zfs-list + zed_log_msg "Error updating zfs-list.cache for ${ZEVENT_POOL}!" + zed_unlock "${FSLIST}" } finished() { - zed_unlock zfs-list + zed_unlock "${FSLIST}" trap - EXIT exit 0 } @@ -37,7 +36,7 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in ;; export) - zed_lock zfs-list + zed_lock "${FSLIST}" trap abort_alter EXIT echo > "${FSLIST}" finished @@ -63,7 +62,7 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in ;; esac -zed_lock zfs-list +zed_lock "${FSLIST}" trap abort_alter EXIT PROPS="name,mountpoint,canmount,atime,relatime,devices,exec\ @@ -79,7 +78,7 @@ PROPS="name,mountpoint,canmount,atime,relatime,devices,exec\ sort "${FSLIST_TMP}" -o "${FSLIST_TMP}" # Don't modify the file if it hasn't changed -diff -q "${FSLIST_TMP}" "${FSLIST}" || mv "${FSLIST_TMP}" "${FSLIST}" +diff -q "${FSLIST_TMP}" "${FSLIST}" || cat "${FSLIST_TMP}" > "${FSLIST}" rm -f "${FSLIST_TMP}" finished -- cgit v1.2.3 From 08cd0717359b1a18693e3c8e6d6e5a2819b35a48 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Tue, 8 Jun 2021 20:20:16 -0400 Subject: Correct a flaw in the Python 3 version checking It turns out the ax_python_devel.m4 version check assumes that ("3.X+1.0" >= "3.X.0") is True in Python, which is not when X+1 is 10 or above and X is not. (Also presumably X+1=100 and ...) So let's remake the check to behave consistently, using the "packaging" or (if absent) the "distlib" modules. (Also, update the Github workflows to use the new packages.) Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Rich Ercolani Closes: #12073 --- .github/workflows/zfs-tests-functional.yml | 3 ++- .github/workflows/zfs-tests-sanity.yml | 3 ++- .github/workflows/zloop.yml | 4 ++-- config/always-pyzfs.m4 | 15 ++++++++++++++ config/ax_python_devel.m4 | 33 ++++++++++++++++++++++++------ rpm/generic/zfs.spec.in | 5 +++++ 6 files changed, 53 insertions(+), 10 deletions(-) diff --git a/.github/workflows/zfs-tests-functional.yml b/.github/workflows/zfs-tests-functional.yml index d2b5764dbf80..eacc95ae1617 100644 --- a/.github/workflows/zfs-tests-functional.yml +++ b/.github/workflows/zfs-tests-functional.yml @@ -26,7 +26,8 @@ jobs: xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ libpam0g-dev pamtester python-dev python-setuptools python-cffi \ - python3 python3-dev python3-setuptools python3-cffi libcurl4-openssl-dev + python-packaging python3 python3-dev python3-setuptools python3-cffi \ + libcurl4-openssl-dev python3-packaging - name: Autogen.sh run: | sh autogen.sh diff --git a/.github/workflows/zfs-tests-sanity.yml b/.github/workflows/zfs-tests-sanity.yml index 9e2ed1b2f7cf..40a7f8ba511c 100644 --- a/.github/workflows/zfs-tests-sanity.yml +++ b/.github/workflows/zfs-tests-sanity.yml @@ -22,7 +22,8 @@ jobs: xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ libpam0g-dev pamtester python-dev python-setuptools python-cffi \ - python3 python3-dev python3-setuptools python3-cffi libcurl4-openssl-dev + python-packaging python3 python3-dev python3-setuptools python3-cffi \ + python3-packaging libcurl4-openssl-dev - name: Autogen.sh run: | sh autogen.sh diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml index 30785b14507a..b3679e7f7f20 100644 --- a/.github/workflows/zloop.yml +++ b/.github/workflows/zloop.yml @@ -22,8 +22,8 @@ jobs: xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ libpam0g-dev \ - python-dev python-setuptools python-cffi \ - python3 python3-dev python3-setuptools python3-cffi + python-dev python-setuptools python-cffi python-packaging \ + python3 python3-dev python3-setuptools python3-cffi python3-packaging - name: Autogen.sh run: | sh autogen.sh diff --git a/config/always-pyzfs.m4 b/config/always-pyzfs.m4 index 76e07b593df2..fa39fd88519c 100644 --- a/config/always-pyzfs.m4 +++ b/config/always-pyzfs.m4 @@ -46,6 +46,21 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ]) AC_SUBST(DEFINE_PYZFS) + dnl # + dnl # Python "packaging" (or, failing that, "distlib") module is required to build and install pyzfs + dnl # + AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ + ZFS_AC_PYTHON_MODULE([packaging], [], [ + ZFS_AC_PYTHON_MODULE([distlib], [], [ + AS_IF([test "x$enable_pyzfs" = xyes], [ + AC_MSG_ERROR("Python $PYTHON_VERSION packaging and distlib modules are not installed") + ], [test "x$enable_pyzfs" != xno], [ + enable_pyzfs=no + ]) + ]) + ]) + ]) + dnl # dnl # Require python-devel libraries dnl # diff --git a/config/ax_python_devel.m4 b/config/ax_python_devel.m4 index faf6c2b0d7ef..fcf73dc20880 100644 --- a/config/ax_python_devel.m4 +++ b/config/ax_python_devel.m4 @@ -97,9 +97,18 @@ AC_DEFUN([AX_PYTHON_DEVEL],[ # Check for a version of Python >= 2.1.0 # AC_MSG_CHECKING([for a version of Python >= '2.1.0']) - ac_supports_python_ver=`$PYTHON -c "import sys; \ - ver = sys.version.split ()[[0]]; \ - print (ver >= '2.1.0')"` + ac_supports_python_ver=`cat<= '3.11.0' + ac_supports_python_ver=`cat<= 8 || 0%{?centos} >= 8 || 0%{?fedora} >= 28 +BuildRequires: python3-packaging +%else +BuildRequires: python-packaging +%endif BuildRequires: python%{__python_pkg_version}-devel BuildRequires: %{__python_cffi_pkg} BuildRequires: %{__python_setuptools_pkg} -- cgit v1.2.3 From 88af959b24301e6fb39ce6ae8b66bdb7e817c710 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 9 Jun 2021 12:21:24 -0700 Subject: Fix minor shellcheck 0.7.2 warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first warning of a misspelling is a false positive, so we annotate the script accordingly. As for the x-prefix warnings update the check to use the conventional '[ -z ]' syntax. all-syslog.sh:46:47: warning: Possible misspelling: ZEVENT_ZIO_OBJECT may not be assigned, but ZEVENT_ZIO_OBJSET is. [SC2153] make_gitrev.sh:53:6: note: Avoid x-prefix in comparisons as it no longer serves a purpose [SC2268] man-dates.sh:10:7: note: Avoid x-prefix in comparisons as it no longer serves a purpose [SC2268] Reviewed-by: Ahelenia Ziemiańska Reviewed-by: John Kennedy Signed-off-by: Brian Behlendorf Closes #12208 --- cmd/zed/zed.d/all-syslog.sh | 1 + scripts/make_gitrev.sh | 2 +- scripts/man-dates.sh | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh index 270b1bc67e5c..b07cf0f295ad 100755 --- a/cmd/zed/zed.d/all-syslog.sh +++ b/cmd/zed/zed.d/all-syslog.sh @@ -42,6 +42,7 @@ fi msg="${msg} delay=$((ZEVENT_ZIO_DELAY / 1000000))ms" # list the bookmark data together +# shellcheck disable=SC2153 [ -n "${ZEVENT_ZIO_OBJSET}" ] && \ msg="${msg} bookmark=${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}:${ZEVENT_ZIO_LEVEL}:${ZEVENT_ZIO_BLKID}" diff --git a/scripts/make_gitrev.sh b/scripts/make_gitrev.sh index da21455332ab..e7f4ce8844d5 100755 --- a/scripts/make_gitrev.sh +++ b/scripts/make_gitrev.sh @@ -50,7 +50,7 @@ esac ZFS_GITREV=$({ cd "${top_srcdir}" && git describe --always --long --dirty 2>/dev/null; } || :) -if [ "x${ZFS_GITREV}" = x ] +if [ -z "${ZFS_GITREV}" ] then # If the source directory is not a git repository, check if the file # already exists (in the source) diff --git a/scripts/man-dates.sh b/scripts/man-dates.sh index 186d94639a56..39f1b5fb1324 100755 --- a/scripts/man-dates.sh +++ b/scripts/man-dates.sh @@ -7,6 +7,6 @@ set -eu find man -type f | while read -r i ; do git_date=$(git log -1 --date=short --format="%ad" -- "$i") - [ "x$git_date" = "x" ] && continue + [ -z "$git_date" ] && continue sed -i "s|^\.Dd.*|.Dd $(date -d "$git_date" "+%B %-d, %Y")|" "$i" done -- cgit v1.2.3 From b0f3e8a6ebe10a9098c7a984ae14c6fc9b0e0d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Mon, 31 May 2021 14:10:11 +0200 Subject: man: use one Makefile, use OpenZFS for .Os MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prevailing style is to use either nothing, or the originating organisational umbrella (here: OpenZFS), and these aren't Linux manpages This also deduplicates the substitution code, and makes adding/removing sexions simpler in future Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12212 --- configure.ac | 3 -- man/Makefile.am | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++- man/man1/Makefile.am | 12 ------ man/man5/Makefile.am | 16 ------- man/man8/Makefile.am | 102 -------------------------------------------- 5 files changed, 116 insertions(+), 134 deletions(-) delete mode 100644 man/man1/Makefile.am delete mode 100644 man/man5/Makefile.am delete mode 100644 man/man8/Makefile.am diff --git a/configure.ac b/configure.ac index 077ad7c43f4d..27409c82f396 100644 --- a/configure.ac +++ b/configure.ac @@ -171,9 +171,6 @@ AC_CONFIG_FILES([ lib/libzstd/Makefile lib/libzutil/Makefile man/Makefile - man/man1/Makefile - man/man5/Makefile - man/man8/Makefile module/Kbuild module/Makefile module/avl/Makefile diff --git a/man/Makefile.am b/man/Makefile.am index 841cb9c4e6a0..4ad4aeaa406e 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -1 +1,116 @@ -SUBDIRS = man1 man5 man8 +include $(top_srcdir)/config/Substfiles.am + +EXTRA_DIST += \ + man1/cstyle.1 + +dist_man_MANS = \ + man1/zhack.1 \ + man1/ztest.1 \ + man1/raidz_test.1 \ + man1/zvol_wait.1 \ + man1/arcstat.1 \ + \ + man5/vdev_id.conf.5 \ + man5/zpool-features.5 \ + man5/spl-module-parameters.5 \ + man5/zfs-module-parameters.5 \ + man5/zfs-events.5 \ + \ + man8/fsck.zfs.8 \ + man8/mount.zfs.8 \ + man8/vdev_id.8 \ + man8/zdb.8 \ + man8/zfs.8 \ + man8/zfsconcepts.8 \ + man8/zfsprops.8 \ + man8/zfs-allow.8 \ + man8/zfs-bookmark.8 \ + man8/zfs-change-key.8 \ + man8/zfs-clone.8 \ + man8/zfs-create.8 \ + man8/zfs-destroy.8 \ + man8/zfs-diff.8 \ + man8/zfs-get.8 \ + man8/zfs-groupspace.8 \ + man8/zfs-hold.8 \ + man8/zfs-inherit.8 \ + man8/zfs-jail.8 \ + man8/zfs-list.8 \ + man8/zfs-load-key.8 \ + man8/zfs-mount.8 \ + man8/zfs-program.8 \ + man8/zfs-project.8 \ + man8/zfs-projectspace.8 \ + man8/zfs-promote.8 \ + man8/zfs-receive.8 \ + man8/zfs-recv.8 \ + man8/zfs-redact.8 \ + man8/zfs-release.8 \ + man8/zfs-rename.8 \ + man8/zfs-rollback.8 \ + man8/zfs-send.8 \ + man8/zfs-set.8 \ + man8/zfs-share.8 \ + man8/zfs-snapshot.8 \ + man8/zfs-unallow.8 \ + man8/zfs-unjail.8 \ + man8/zfs-unload-key.8 \ + man8/zfs-unmount.8 \ + man8/zfs-upgrade.8 \ + man8/zfs-userspace.8 \ + man8/zfs-wait.8 \ + man8/zfs_ids_to_path.8 \ + man8/zgenhostid.8 \ + man8/zinject.8 \ + man8/zpool.8 \ + man8/zpoolconcepts.8 \ + man8/zpoolprops.8 \ + man8/zpool-add.8 \ + man8/zpool-attach.8 \ + man8/zpool-checkpoint.8 \ + man8/zpool-clear.8 \ + man8/zpool-create.8 \ + man8/zpool-destroy.8 \ + man8/zpool-detach.8 \ + man8/zpool-events.8 \ + man8/zpool-export.8 \ + man8/zpool-get.8 \ + man8/zpool-history.8 \ + man8/zpool-import.8 \ + man8/zpool-initialize.8 \ + man8/zpool-iostat.8 \ + man8/zpool-labelclear.8 \ + man8/zpool-list.8 \ + man8/zpool-offline.8 \ + man8/zpool-online.8 \ + man8/zpool-reguid.8 \ + man8/zpool-remove.8 \ + man8/zpool-reopen.8 \ + man8/zpool-replace.8 \ + man8/zpool-resilver.8 \ + man8/zpool-scrub.8 \ + man8/zpool-set.8 \ + man8/zpool-split.8 \ + man8/zpool-status.8 \ + man8/zpool-sync.8 \ + man8/zpool-trim.8 \ + man8/zpool-upgrade.8 \ + man8/zpool-wait.8 \ + man8/zstream.8 \ + man8/zstreamdump.8 \ + man8/zpool_influxdb.8 + +nodist_man_MANS = \ + man8/zed.8 \ + man8/zfs-mount-generator.8 + +SUBSTFILES += $(nodist_man_MANS) + + +if BUILD_LINUX +# The manual pager in most Linux distros defaults to "BSD" when .Os is blank, +# but leaving it blank makes things a lot easier on +# FreeBSD when OpenZFS is vendored in the base system. +install-data-hook: + cd $(DESTDIR)$(mandir) && $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os OpenZFS/' $(dist_man_MANS) $(nodist_man_MANS) +endif diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am deleted file mode 100644 index 8d7457a3e258..000000000000 --- a/man/man1/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 arcstat.1 -EXTRA_DIST = cstyle.1 - -if BUILD_LINUX -# The man pager in most Linux distros defaults to BSD instead of Linux -# when .Os is blank, but leaving it blank makes things a lot easier on -# FreeBSD when OpenZFS is vendored in the base system. -install-data-hook: - cd $(DESTDIR)$(mandir)/man1; \ - $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os Linux/' \ - $(dist_man_MANS) -endif diff --git a/man/man5/Makefile.am b/man/man5/Makefile.am deleted file mode 100644 index 9cbb2c08f378..000000000000 --- a/man/man5/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -dist_man_MANS = \ - vdev_id.conf.5 \ - zpool-features.5 \ - spl-module-parameters.5 \ - zfs-module-parameters.5 \ - zfs-events.5 - -if BUILD_LINUX -# The man pager in most Linux distros defaults to BSD instead of Linux -# when .Os is blank, but leaving it blank makes things a lot easier on -# FreeBSD when OpenZFS is vendored in the base system. -install-data-hook: - cd $(DESTDIR)$(mandir)/man5; \ - $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os Linux/' \ - $(dist_man_MANS) -endif diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am deleted file mode 100644 index 602645180beb..000000000000 --- a/man/man8/Makefile.am +++ /dev/null @@ -1,102 +0,0 @@ -include $(top_srcdir)/config/Substfiles.am - -dist_man_MANS = \ - fsck.zfs.8 \ - mount.zfs.8 \ - vdev_id.8 \ - zdb.8 \ - zfs.8 \ - zfsconcepts.8 \ - zfsprops.8 \ - zfs-allow.8 \ - zfs-bookmark.8 \ - zfs-change-key.8 \ - zfs-clone.8 \ - zfs-create.8 \ - zfs-destroy.8 \ - zfs-diff.8 \ - zfs-get.8 \ - zfs-groupspace.8 \ - zfs-hold.8 \ - zfs-inherit.8 \ - zfs-jail.8 \ - zfs-list.8 \ - zfs-load-key.8 \ - zfs-mount.8 \ - zfs-program.8 \ - zfs-project.8 \ - zfs-projectspace.8 \ - zfs-promote.8 \ - zfs-receive.8 \ - zfs-recv.8 \ - zfs-redact.8 \ - zfs-release.8 \ - zfs-rename.8 \ - zfs-rollback.8 \ - zfs-send.8 \ - zfs-set.8 \ - zfs-share.8 \ - zfs-snapshot.8 \ - zfs-unallow.8 \ - zfs-unjail.8 \ - zfs-unload-key.8 \ - zfs-unmount.8 \ - zfs-upgrade.8 \ - zfs-userspace.8 \ - zfs-wait.8 \ - zfs_ids_to_path.8 \ - zgenhostid.8 \ - zinject.8 \ - zpool.8 \ - zpoolconcepts.8 \ - zpoolprops.8 \ - zpool-add.8 \ - zpool-attach.8 \ - zpool-checkpoint.8 \ - zpool-clear.8 \ - zpool-create.8 \ - zpool-destroy.8 \ - zpool-detach.8 \ - zpool-events.8 \ - zpool-export.8 \ - zpool-get.8 \ - zpool-history.8 \ - zpool-import.8 \ - zpool-initialize.8 \ - zpool-iostat.8 \ - zpool-labelclear.8 \ - zpool-list.8 \ - zpool-offline.8 \ - zpool-online.8 \ - zpool-reguid.8 \ - zpool-remove.8 \ - zpool-reopen.8 \ - zpool-replace.8 \ - zpool-resilver.8 \ - zpool-scrub.8 \ - zpool-set.8 \ - zpool-split.8 \ - zpool-status.8 \ - zpool-sync.8 \ - zpool-trim.8 \ - zpool-upgrade.8 \ - zpool-wait.8 \ - zstream.8 \ - zstreamdump.8 \ - zpool_influxdb.8 - -nodist_man_MANS = \ - zed.8 \ - zfs-mount-generator.8 - -SUBSTFILES += $(nodist_man_MANS) - -if BUILD_LINUX -# The man pager in most Linux distros defaults to BSD instead of Linux -# when .Os is blank, but leaving it blank makes things a lot easier on -# FreeBSD when OpenZFS is vendored in the base system. -install-data-hook: - cd $(DESTDIR)$(mandir)/man8; \ - $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os Linux/' \ - $(dist_man_MANS) $(nodist_man_MANS) -endif -- cgit v1.2.3 From 2badb3457ad396b3c2d282d7a9eae90259b15a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 4 Jun 2021 22:29:26 +0200 Subject: Move properties, parameters, events, and concepts around manual sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pages moved as follows: zpool-features.{5 => 7} spl{-module-parameters.5 => .4} zfs{-module-parameters.5 => .4} zfs-events.5 => into zpool-events.8 zfsconcepts.{8 => 7} zfsprops.{8 => 7} zpoolconcepts.{8 => 7} zpoolprops.{8 => 7} Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Co-authored-by: Daniel Ebdrup Jensen Closes #12149 Closes #12212 --- cmd/zpool/zpool_main.c | 6 +- .../ZFS-pool-latency-heatmaps-influxdb.json | 4 +- man/Makefile.am | 17 +- man/man1/zhack.1 | 2 +- man/man1/ztest.1 | 2 +- man/man4/spl.4 | 195 ++ man/man4/zfs.4 | 2380 ++++++++++++++++++++ man/man5/spl-module-parameters.5 | 196 -- man/man5/zfs-events.5 | 448 ---- man/man5/zfs-module-parameters.5 | 2379 ------------------- man/man5/zpool-features.5 | 842 ------- man/man7/zfsconcepts.7 | 206 ++ man/man7/zfsprops.7 | 2067 +++++++++++++++++ man/man7/zpool-features.7 | 842 +++++++ man/man7/zpoolconcepts.7 | 512 +++++ man/man7/zpoolprops.7 | 412 ++++ man/man8/mount.zfs.8 | 2 +- man/man8/zed.8.in | 1 - man/man8/zfs-bookmark.8 | 2 +- man/man8/zfs-clone.8 | 2 +- man/man8/zfs-create.8 | 2 +- man/man8/zfs-jail.8 | 4 +- man/man8/zfs-list.8 | 8 +- man/man8/zfs-load-key.8 | 4 +- man/man8/zfs-mount-generator.8.in | 4 +- man/man8/zfs-mount.8 | 2 +- man/man8/zfs-receive.8 | 2 +- man/man8/zfs-send.8 | 8 +- man/man8/zfs-set.8 | 12 +- man/man8/zfs-share.8 | 2 +- man/man8/zfs-snapshot.8 | 2 +- man/man8/zfs-upgrade.8 | 2 +- man/man8/zfs-userspace.8 | 4 +- man/man8/zfs.8 | 10 +- man/man8/zfsconcepts.8 | 206 -- man/man8/zfsprops.8 | 2067 ----------------- man/man8/zgenhostid.8 | 2 +- man/man8/zpool-add.8 | 4 +- man/man8/zpool-attach.8 | 2 +- man/man8/zpool-create.8 | 14 +- man/man8/zpool-events.8 | 422 +++- man/man8/zpool-get.8 | 10 +- man/man8/zpool-import.8 | 4 +- man/man8/zpool-list.8 | 2 +- man/man8/zpool-remove.8 | 2 +- man/man8/zpool-replace.8 | 2 +- man/man8/zpool-split.8 | 2 +- man/man8/zpool-status.8 | 2 +- man/man8/zpool-sync.8 | 4 +- man/man8/zpool-trim.8 | 4 +- man/man8/zpool-upgrade.8 | 12 +- man/man8/zpool.8 | 21 +- man/man8/zpoolconcepts.8 | 512 ----- man/man8/zpoolprops.8 | 412 ---- rpm/generic/zfs.spec.in | 2 + scripts/zol2zfs-patch.sed | 2 +- .../tests/functional/l2arc/l2arc_mfuonly_pos.ksh | 2 +- 57 files changed, 7128 insertions(+), 7167 deletions(-) create mode 100644 man/man4/spl.4 create mode 100644 man/man4/zfs.4 delete mode 100644 man/man5/spl-module-parameters.5 delete mode 100644 man/man5/zfs-events.5 delete mode 100644 man/man5/zfs-module-parameters.5 delete mode 100644 man/man5/zpool-features.5 create mode 100644 man/man7/zfsconcepts.7 create mode 100644 man/man7/zfsprops.7 create mode 100644 man/man7/zpool-features.7 create mode 100644 man/man7/zpoolconcepts.7 create mode 100644 man/man7/zpoolprops.7 delete mode 100644 man/man8/zfsconcepts.8 delete mode 100644 man/man8/zfsprops.8 delete mode 100644 man/man8/zpoolconcepts.8 delete mode 100644 man/man8/zpoolprops.8 diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 02415b157935..35a59710c05e 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -533,7 +533,7 @@ usage(boolean_t requested) (void) fprintf(fp, "YES disabled | enabled | active\n"); (void) fprintf(fp, gettext("\nThe feature@ properties must be " - "appended with a feature name.\nSee zpool-features(5).\n")); + "appended with a feature name.\nSee zpool-features(7).\n")); } /* @@ -8248,7 +8248,7 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_YELLOW, gettext("Enable all features using " "'zpool upgrade'. Once this is done,\n\tthe pool may no " "longer be accessible by software that does not support\n\t" - "the features. See zpool-features(5) for details.\n")); + "the features. See zpool-features(7) for details.\n")); break; case ZPOOL_STATUS_COMPATIBILITY_ERR: @@ -8951,7 +8951,7 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) "pool may become incompatible with " "software\nthat does not support " "the feature. See " - "zpool-features(5) for " + "zpool-features(7) for " "details.\n\n" "Note that the pool " "'compatibility' feature can be " diff --git a/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json b/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json index a99f92783bc4..70260ae40814 100644 --- a/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json +++ b/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json @@ -1360,7 +1360,7 @@ "type": "row" }, { - "content": "I/O requests that are satisfied by accessing pool devices are managed by the ZIO scheduler.\nThe total latency is measured from the start of the I/O to completion by the disk.\nLatency through each queue is shown prior to its submission to the disk queue.\n\nThis view is useful for observing the effects of tuning the ZIO scheduler min and max values\n(see zfs-module-parameters(5) and [ZFS on Linux Module Parameters](https://openzfs.github.io/openzfs-docs/Performance%20and%20tuning/ZFS%20on%20Linux%20Module%20Parameters.html)):\n+ *zfs_vdev_max_active* controls the ZIO scheduler's disk queue depth (do not confuse with the block device's nr_requests)\n+ *zfs_vdev_sync_read_min_active* and *zfs_vdev_sync_read_max_active* control the synchronous queue for reads: most reads are sync\n+ *zfs_vdev_sync_write_min_active* and *zfs_vdev_sync_write_max_active* control the synchronous queue for writes: \nusually metadata or user data depending on the \"sync\" property setting or I/Os that are requested to be flushed\n+ *zfs_vdev_async_read_min_active* and *zfs_vdev_async_read_max_active* control the asynchronous queue for reads: usually prefetches\n+ *zfs_vdev_async_write_min_active* and *zfs_vdev_async_write_max_active* control the asynchronous queue for writes: \nusually the bulk of all writes at transaction group (txg) commit\n+ *zfs_vdev_scrub_min_active* and *zfs_vdev_scrub_max_active* controls the scan reads: usually scrub or resilver\n\n", + "content": "I/O requests that are satisfied by accessing pool devices are managed by the ZIO scheduler.\nThe total latency is measured from the start of the I/O to completion by the disk.\nLatency through each queue is shown prior to its submission to the disk queue.\n\nThis view is useful for observing the effects of tuning the ZIO scheduler min and max values\n(see zfs(4) and [ZFS on Linux Module Parameters](https://openzfs.github.io/openzfs-docs/Performance%20and%20tuning/ZFS%20on%20Linux%20Module%20Parameters.html)):\n+ *zfs_vdev_max_active* controls the ZIO scheduler's disk queue depth (do not confuse with the block device's nr_requests)\n+ *zfs_vdev_sync_read_min_active* and *zfs_vdev_sync_read_max_active* control the synchronous queue for reads: most reads are sync\n+ *zfs_vdev_sync_write_min_active* and *zfs_vdev_sync_write_max_active* control the synchronous queue for writes: \nusually metadata or user data depending on the \"sync\" property setting or I/Os that are requested to be flushed\n+ *zfs_vdev_async_read_min_active* and *zfs_vdev_async_read_max_active* control the asynchronous queue for reads: usually prefetches\n+ *zfs_vdev_async_write_min_active* and *zfs_vdev_async_write_max_active* control the asynchronous queue for writes: \nusually the bulk of all writes at transaction group (txg) commit\n+ *zfs_vdev_scrub_min_active* and *zfs_vdev_scrub_max_active* controls the scan reads: usually scrub or resilver\n\n", "datasource": "${DS_MACBOOK-INFLUX}", "fieldConfig": { "defaults": { @@ -1664,4 +1664,4 @@ "list": [] }, "version": 2 -} \ No newline at end of file +} diff --git a/man/Makefile.am b/man/Makefile.am index 4ad4aeaa406e..8ab1b757242c 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -11,18 +11,21 @@ dist_man_MANS = \ man1/arcstat.1 \ \ man5/vdev_id.conf.5 \ - man5/zpool-features.5 \ - man5/spl-module-parameters.5 \ - man5/zfs-module-parameters.5 \ - man5/zfs-events.5 \ + \ + man4/spl.4 \ + man4/zfs.4 \ + \ + man7/zpool-features.7 \ + man7/zfsconcepts.7 \ + man7/zfsprops.7 \ + man7/zpoolconcepts.7 \ + man7/zpoolprops.7 \ \ man8/fsck.zfs.8 \ man8/mount.zfs.8 \ man8/vdev_id.8 \ man8/zdb.8 \ man8/zfs.8 \ - man8/zfsconcepts.8 \ - man8/zfsprops.8 \ man8/zfs-allow.8 \ man8/zfs-bookmark.8 \ man8/zfs-change-key.8 \ @@ -63,8 +66,6 @@ dist_man_MANS = \ man8/zgenhostid.8 \ man8/zinject.8 \ man8/zpool.8 \ - man8/zpoolconcepts.8 \ - man8/zpoolprops.8 \ man8/zpool-add.8 \ man8/zpool-attach.8 \ man8/zpool-checkpoint.8 \ diff --git a/man/man1/zhack.1 b/man/man1/zhack.1 index e2fc189b4d0d..b18b3a4c0996 100644 --- a/man/man1/zhack.1 +++ b/man/man1/zhack.1 @@ -138,5 +138,5 @@ descriptions_obj: . .Sh SEE ALSO .Xr ztest 1 , -.Xr zpool-features 5 , +.Xr zpool-features 7 , .Xr zfs 8 diff --git a/man/man1/ztest.1 b/man/man1/ztest.1 index 459486c286bf..fd1374a2f106 100644 --- a/man/man1/ztest.1 +++ b/man/man1/ztest.1 @@ -230,4 +230,4 @@ By default the stack size is limited to .Xr zdb 1 , .Xr zfs 1 , .Xr zpool 1 , -.Xr spl-module-parameters 5 +.Xr spl 4 diff --git a/man/man4/spl.4 b/man/man4/spl.4 new file mode 100644 index 000000000000..11cde14ae5ca --- /dev/null +++ b/man/man4/spl.4 @@ -0,0 +1,195 @@ +.\" +.\" The contents of this file are subject to the terms of the Common Development +.\" and Distribution License (the "License"). You may not use this file except +.\" in compliance with the License. You can obtain a copy of the license at +.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. +.\" +.\" See the License for the specific language governing permissions and +.\" limitations under the License. When distributing Covered Code, include this +.\" CDDL HEADER in each file and include the License file at +.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this +.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your +.\" own identifying information: +.\" Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" Copyright 2013 Turbo Fredriksson . All rights reserved. +.\" +.Dd August 24, 2020 +.Dt SPL 4 +.Os +. +.Sh NAME +.Nm spl +.Nd parameters of the SPL kernel module +. +.Sh DESCRIPTION +.Bl -tag -width Ds +.It Sy spl_kmem_cache_kmem_threads Ns = Ns Sy 4 Pq uint +The number of threads created for the spl_kmem_cache task queue. +This task queue is responsible for allocating new slabs +for use by the kmem caches. +For the majority of systems and workloads only a small number of threads are +required. +. +.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint +When this is set it prevents Linux from being able to rapidly reclaim all the +memory held by the kmem caches. +This may be useful in circumstances where it's preferable that Linux +reclaim memory from some other subsystem first. +Setting this will increase the likelihood out of memory events on a memory +constrained system. +. +.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint +The preferred number of objects per slab in the cache. +In general, a larger value will increase the caches memory footprint +while decreasing the time required to perform an allocation. +Conversely, a smaller value will minimize the footprint +and improve cache reclaim time but individual allocations may take longer. +. +.It Sy spl_kmem_cache_max_size Ns = Ns Sy 32 Po 64-bit Pc or Sy 4 Po 32-bit Pc Pq uint +The maximum size of a kmem cache slab in MiB. +This effectively limits the maximum cache object size to +.Sy spl_kmem_cache_max_size Ns / Ns Sy spl_kmem_cache_obj_per_slab . +.Pp +Caches may not be created with +object sized larger than this limit. +. +.It Sy spl_kmem_cache_slab_limit Ns = Ns Sy 16384 Pq uint +For small objects the Linux slab allocator should be used to make the most +efficient use of the memory. +However, large objects are not supported by +the Linux slab and therefore the SPL implementation is preferred. +This value is used to determine the cutoff between a small and large object. +.Pp +Objects of size +.Sy spl_kmem_cache_slab_limit +or smaller will be allocated using the Linux slab allocator, +large objects use the SPL allocator. +A cutoff of 16K was determined to be optimal for architectures using 4K pages. +. +.It Sy spl_kmem_alloc_warn Ns = Ns Sy 32768 Pq uint +As a general rule +.Fn kmem_alloc +allocations should be small, +preferably just a few pages, since they must by physically contiguous. +Therefore, a rate limited warning will be printed to the console for any +.Fn kmem_alloc +which exceeds a reasonable threshold. +.Pp +The default warning threshold is set to eight pages but capped at 32K to +accommodate systems using large pages. +This value was selected to be small enough to ensure +the largest allocations are quickly noticed and fixed. +But large enough to avoid logging any warnings when a allocation size is +larger than optimal but not a serious concern. +Since this value is tunable, developers are encouraged to set it lower +when testing so any new largish allocations are quickly caught. +These warnings may be disabled by setting the threshold to zero. +. +.It Sy spl_kmem_alloc_max Ns = Ns Sy KMALLOC_MAX_SIZE Ns / Ns Sy 4 Pq uint +Large +.Fn kmem_alloc +allocations will fail if they exceed +.Sy KMALLOC_MAX_SIZE . +Allocations which are marginally smaller than this limit may succeed but +should still be avoided due to the expense of locating a contiguous range +of free pages. +Therefore, a maximum kmem size with reasonable safely margin of 4x is set. +.Fn kmem_alloc +allocations larger than this maximum will quickly fail. +.Fn vmem_alloc +allocations less than or equal to this value will use +.Fn kmalloc , +but shift to +.Fn vmalloc +when exceeding this value. +. +.It Sy spl_kmem_cache_magazine_size Ns = Ns Sy 0 Pq uint +Cache magazines are an optimization designed to minimize the cost of +allocating memory. +They do this by keeping a per-cpu cache of recently +freed objects, which can then be reallocated without taking a lock. +This can improve performance on highly contended caches. +However, because objects in magazines will prevent otherwise empty slabs +from being immediately released this may not be ideal for low memory machines. +.Pp +For this reason, +.Sy spl_kmem_cache_magazine_size +can be used to set a maximum magazine size. +When this value is set to 0 the magazine size will +be automatically determined based on the object size. +Otherwise magazines will be limited to 2-256 objects per magazine (i.e per cpu). +Magazines may never be entirely disabled in this implementation. +. +.It Sy spl_hostid Ns = Ns Sy 0 Pq ulong +The system hostid, when set this can be used to uniquely identify a system. +By default this value is set to zero which indicates the hostid is disabled. +It can be explicitly enabled by placing a unique non-zero value in +.Pa /etc/hostid . +. +.It Sy spl_hostid_path Ns = Ns Pa /etc/hostid Pq charp +The expected path to locate the system hostid when specified. +This value may be overridden for non-standard configurations. +. +.It Sy spl_panic_halt Ns = Ns Sy 0 Pq uint +Cause a kernel panic on assertion failures. +When not enabled, the thread is halted to facilitate further debugging. +.Pp +Set to a non-zero value to enable. +. +.It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint +Kick stuck taskq to spawn threads. +When writing a non-zero value to it, it will scan all the taskqs. +If any of them have a pending task more than 5 seconds old, +it will kick it to spawn more threads. +This can be used if you find a rare +deadlock occurs because one or more taskqs didn't spawn a thread when it should. +. +.It Sy spl_taskq_thread_bind Ns = Ns Sy 0 Pq int +Bind taskq threads to specific CPUs. +When enabled all taskq threads will be distributed evenly +across the available CPUs. +By default, this behavior is disabled to allow the Linux scheduler +the maximum flexibility to determine where a thread should run. +. +.It Sy spl_taskq_thread_dynamic Ns = Ns Sy 1 Pq int +Allow dynamic taskqs. +When enabled taskqs which set the +.Sy TASKQ_DYNAMIC +flag will by default create only a single thread. +New threads will be created on demand up to a maximum allowed number +to facilitate the completion of outstanding tasks. +Threads which are no longer needed will be promptly destroyed. +By default this behavior is enabled but it can be disabled to +aid performance analysis or troubleshooting. +. +.It Sy spl_taskq_thread_priority Ns = Ns Sy 1 Pq int +Allow newly created taskq threads to set a non-default scheduler priority. +When enabled, the priority specified when a taskq is created will be applied +to all threads created by that taskq. +When disabled all threads will use the default Linux kernel thread priority. +By default, this behavior is enabled. +. +.It Sy spl_taskq_thread_sequential Ns = Ns Sy 4 Pq int +The number of items a taskq worker thread must handle without interruption +before requesting a new worker thread be spawned. +This is used to control +how quickly taskqs ramp up the number of threads processing the queue. +Because Linux thread creation and destruction are relatively inexpensive a +small default value has been selected. +This means that normally threads will be created aggressively which is desirable. +Increasing this value will +result in a slower thread creation rate which may be preferable for some +configurations. +. +.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint +The maximum number of tasks per pending list in each taskq shown in +.Pa /proc/spl/taskq{,-all} . +Write +.Sy 0 +to turn off the limit. +The proc file will walk the lists with lock held, +reading it could cause a lock-up if the list grow too large +without limiting the output. +"(truncated)" will be shown if the list is larger than the limit. +.El diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 new file mode 100644 index 000000000000..6da8d42b42bd --- /dev/null +++ b/man/man4/zfs.4 @@ -0,0 +1,2380 @@ +.\" +.\" Copyright (c) 2013 by Turbo Fredriksson . All rights reserved. +.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved. +.\" Copyright (c) 2019 Datto Inc. +.\" The contents of this file are subject to the terms of the Common Development +.\" and Distribution License (the "License"). You may not use this file except +.\" in compliance with the License. You can obtain a copy of the license at +.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. +.\" +.\" See the License for the specific language governing permissions and +.\" limitations under the License. When distributing Covered Code, include this +.\" CDDL HEADER in each file and include the License file at +.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this +.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your +.\" own identifying information: +.\" Portions Copyright [yyyy] [name of copyright owner] +.\" +.Dd June 1, 2021 +.Dt ZFS 4 +.Os +. +.Sh NAME +.Nm zfs +.Nd tuning of the ZFS kernel module +. +.Sh DESCRIPTION +The ZFS module supports these parameters: +.Bl -tag -width Ds +.It Sy dbuf_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong +Maximum size in bytes of the dbuf cache. +The target size is determined by the MIN versus +.No 1/2^ Ns Sy dbuf_cache_shift Pq 1/32nd +of the target ARC size. +The behavior of the dbuf cache and its associated settings +can be observed via the +.Pa /proc/spl/kstat/zfs/dbufstats +kstat. +. +.It Sy dbuf_metadata_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong +Maximum size in bytes of the metadata dbuf cache. +The target size is determined by the MIN versus +.No 1/2^ Ns Sy dbuf_metadata_cache_shift Pq 1/64th +of the target ARC size. +The behavior of the metadata dbuf cache and its associated settings +can be observed via the +.Pa /proc/spl/kstat/zfs/dbufstats +kstat. +. +.It Sy dbuf_cache_hiwater_pct Ns = Ns Sy 10 Ns % Pq uint +The percentage over +.Sy dbuf_cache_max_bytes +when dbufs must be evicted directly. +. +.It Sy dbuf_cache_lowater_pct Ns = Ns Sy 10 Ns % Pq uint +The percentage below +.Sy dbuf_cache_max_bytes +when the evict thread stops evicting dbufs. +. +.It Sy dbuf_cache_shift Ns = Ns Sy 5 Pq int +Set the size of the dbuf cache +.Pq Sy dbuf_cache_max_bytes +to a log2 fraction of the target ARC size. +. +.It Sy dbuf_metadata_cache_shift Ns = Ns Sy 6 Pq int +Set the size of the dbuf metadata cache +.Pq Sy dbuf_metadata_cache_max_bytes +to a log2 fraction of the target ARC size. +. +.It Sy dmu_object_alloc_chunk_shift Ns = Ns Sy 7 Po 128 Pc Pq int +dnode slots allocated in a single operation as a power of 2. +The default value minimizes lock contention for the bulk operation performed. +. +.It Sy dmu_prefetch_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq int +Limit the amount we can prefetch with one call to this amount in bytes. +This helps to limit the amount of memory that can be used by prefetching. +. +.It Sy ignore_hole_birth Pq int +Alias for +.Sy send_holes_without_birth_time . +. +.It Sy l2arc_feed_again Ns = Ns Sy 1 Ns | Ns 0 Pq int +Turbo L2ARC warm-up. +When the L2ARC is cold the fill interval will be set as fast as possible. +. +.It Sy l2arc_feed_min_ms Ns = Ns Sy 200 Pq ulong +Min feed interval in milliseconds. +Requires +.Sy l2arc_feed_again Ns = Ns Ar 1 +and only applicable in related situations. +. +.It Sy l2arc_feed_secs Ns = Ns Sy 1 Pq ulong +Seconds between L2ARC writing. +. +.It Sy l2arc_headroom Ns = Ns Sy 2 Pq ulong +How far through the ARC lists to search for L2ARC cacheable content, +expressed as a multiplier of +.Sy l2arc_write_max . +ARC persistence across reboots can be achieved with persistent L2ARC +by setting this parameter to +.Sy 0 , +allowing the full length of ARC lists to be searched for cacheable content. +. +.It Sy l2arc_headroom_boost Ns = Ns Sy 200 Ns % Pq ulong +Scales +.Sy l2arc_headroom +by this percentage when L2ARC contents are being successfully compressed +before writing. +A value of +.Sy 100 +disables this feature. +. +.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int +Controls whether only MFU metadata and data are cached from ARC into L2ARC. +This may be desired to avoid wasting space on L2ARC when reading/writing large +amounts of data that are not expected to be accessed more than once. +.Pp +The default is off, +meaning both MRU and MFU data and metadata are cached. +When turning off this feature, some MRU buffers will still be present +in ARC and eventually cached on L2ARC. +.No If Sy l2arc_noprefetch Ns = Ns Sy 0 , +some prefetched buffers will be cached to L2ARC, and those might later +transition to MRU, in which case the +.Sy l2arc_mru_asize No arcstat will not be Sy 0 . +.Pp +Regardless of +.Sy l2arc_noprefetch , +some MFU buffers might be evicted from ARC, +accessed later on as prefetches and transition to MRU as prefetches. +If accessed again they are counted as MRU and the +.Sy l2arc_mru_asize No arcstat will not be Sy 0 . +.Pp +The ARC status of L2ARC buffers when they were first cached in +L2ARC can be seen in the +.Sy l2arc_mru_asize , Sy l2arc_mfu_asize , No and Sy l2arc_prefetch_asize +arcstats when importing the pool or onlining a cache +device if persistent L2ARC is enabled. +.Pp +The +.Sy evict_l2_eligible_mru +arcstat does not take into account if this option is enabled as the information +provided by the +.Sy evict_l2_eligible_m[rf]u +arcstats can be used to decide if toggling this option is appropriate +for the current workload. +. +.It Sy l2arc_meta_percent Ns = Ns Sy 33 Ns % Pq int +Percent of ARC size allowed for L2ARC-only headers. +Since L2ARC buffers are not evicted on memory pressure, +too many headers on a system with an irrationally large L2ARC +can render it slow or unusable. +This parameter limits L2ARC writes and rebuilds to achieve the target. +. +.It Sy l2arc_trim_ahead Ns = Ns Sy 0 Ns % Pq ulong +Trims ahead of the current write size +.Pq Sy l2arc_write_max +on L2ARC devices by this percentage of write size if we have filled the device. +If set to +.Sy 100 +we TRIM twice the space required to accommodate upcoming writes. +A minimum of +.Sy 64MB +will be trimmed. +It also enables TRIM of the whole L2ARC device upon creation +or addition to an existing pool or if the header of the device is +invalid upon importing a pool or onlining a cache device. +A value of +.Sy 0 +disables TRIM on L2ARC altogether and is the default as it can put significant +stress on the underlying storage devices. +This will vary depending of how well the specific device handles these commands. +. +.It Sy l2arc_noprefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int +Do not write buffers to L2ARC if they were prefetched but not used by +applications. +In case there are prefetched buffers in L2ARC and this option +is later set, we do not read the prefetched buffers from L2ARC. +Unsetting this option is useful for caching sequential reads from the +disks to L2ARC and serve those reads from L2ARC later on. +This may be beneficial in case the L2ARC device is significantly faster +in sequential reads than the disks of the pool. +.Pp +Use +.Sy 1 +to disable and +.Sy 0 +to enable caching/reading prefetches to/from L2ARC. +. +.It Sy l2arc_norw Ns = Ns Sy 0 Ns | Ns 1 Pq int +No reads during writes. +. +.It Sy l2arc_write_boost Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong +Cold L2ARC devices will have +.Sy l2arc_write_max +increased by this amount while they remain cold. +. +.It Sy l2arc_write_max Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong +Max write bytes per interval. +. +.It Sy l2arc_rebuild_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Rebuild the L2ARC when importing a pool (persistent L2ARC). +This can be disabled if there are problems importing a pool +or attaching an L2ARC device (e.g. the L2ARC device is slow +in reading stored log metadata, or the metadata +has become somehow fragmented/unusable). +. +.It Sy l2arc_rebuild_blocks_min_l2size Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong +Mininum size of an L2ARC device required in order to write log blocks in it. +The log blocks are used upon importing the pool to rebuild the persistent L2ARC. +.Pp +For L2ARC devices less than 1GB, the amount of data +.Fn l2arc_evict +evicts is significant compared to the amount of restored L2ARC data. +In this case, do not write log blocks in L2ARC in order not to waste space. +. +.It Sy metaslab_aliquot Ns = Ns Sy 524288 Ns B Po 512kB Pc Pq ulong +Metaslab granularity, in bytes. +This is roughly similar to what would be referred to as the "stripe size" +in traditional RAID arrays. +In normal operation, ZFS will try to write this amount of data +to a top-level vdev before moving on to the next one. +. +.It Sy metaslab_bias_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable metaslab group biasing based on their vdevs' over- or under-utilization +relative to the pool. +. +.It Sy metaslab_force_ganging Ns = Ns Sy 16777217 Ns B Ns B Po 16MB + 1B Pc Pq ulong +Make some blocks above a certain size be gang blocks. +This option is used by the test suite to facilitate testing. +. +.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Ns B Po 1MB Pc Pq int +When attempting to log an output nvlist of an ioctl in the on-disk history, +the output will not be stored if it is larger than this size (in bytes). +This must be less than +.Sy DMU_MAX_ACCESS Pq 64MB . +This applies primarily to +.Fn zfs_ioc_channel_program Pq cf. Xr zfs-program 8 . +. +.It Sy zfs_keep_log_spacemaps_at_export Ns = Ns Sy 0 Ns | Ns 1 Pq int +Prevent log spacemaps from being destroyed during pool exports and destroys. +. +.It Sy zfs_metaslab_segment_weight_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable/disable segment-based metaslab selection. +. +.It Sy zfs_metaslab_switch_threshold Ns = Ns Sy 2 Pq int +When using segment-based metaslab selection, continue allocating +from the active metaslab until this option's +worth of buckets have been exhausted. +. +.It Sy metaslab_debug_load Ns = Ns Sy 0 Ns | Ns 1 Pq int +Load all metaslabs during pool import. +. +.It Sy metaslab_debug_unload Ns = Ns Sy 0 Ns | Ns 1 Pq int +Prevent metaslabs from being unloaded. +. +.It Sy metaslab_fragmentation_factor_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable use of the fragmentation metric in computing metaslab weights. +. +.It Sy metaslab_df_max_search Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int +Maximum distance to search forward from the last offset. +Without this limit, fragmented pools can see +.Em >100`000 +iterations and +.Fn metaslab_block_picker +becomes the performance limiting factor on high-performance storage. +.Pp +With the default setting of +.Sy 16MB , +we typically see less than +.Em 500 +iterations, even with very fragmented +.Sy ashift Ns = Ns Sy 9 +pools. +The maximum number of iterations possible is +.Sy metaslab_df_max_search / 2^(ashift+1) . +With the default setting of +.Sy 16MB +this is +.Em 16*1024 Pq with Sy ashift Ns = Ns Sy 9 +or +.Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 . +. +.It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int +If not searching forward (due to +.Sy metaslab_df_max_search , metaslab_df_free_pct , +.No or Sy metaslab_df_alloc_threshold ) , +this tunable controls which segment is used. +If set, we will use the largest free segment. +If unset, we will use a segment of at least the requested size. +. +.It Sy zfs_metaslab_max_size_cache_sec Ns = Ns Sy 3600 Ns s Po 1h Pc Pq ulong +When we unload a metaslab, we cache the size of the largest free chunk. +We use that cached size to determine whether or not to load a metaslab +for a given allocation. +As more frees accumulate in that metaslab while it's unloaded, +the cached max size becomes less and less accurate. +After a number of seconds controlled by this tunable, +we stop considering the cached max size and start +considering only the histogram instead. +. +.It Sy zfs_metaslab_mem_limit Ns = Ns Sy 25 Ns % Pq int +When we are loading a new metaslab, we check the amount of memory being used +to store metaslab range trees. +If it is over a threshold, we attempt to unload the least recently used metaslab +to prevent the system from clogging all of its memory with range trees. +This tunable sets the percentage of total system memory that is the threshold. +. +.It Sy zfs_metaslab_try_hard_before_gang Ns = Ns Sy 0 Ns | Ns 1 Pq int +.Bl -item -compact +.It +If unset, we will first try normal allocation. +.It +If that fails then we will do a gang allocation. +.It +If that fails then we will do a "try hard" gang allocation. +.It +If that fails then we will have a multi-layer gang block. +.El +.Pp +.Bl -item -compact +.It +If set, we will first try normal allocation. +.It +If that fails then we will do a "try hard" allocation. +.It +If that fails we will do a gang allocation. +.It +If that fails we will do a "try hard" gang allocation. +.It +If that fails then we will have a multi-layer gang block. +.El +. +.It Sy zfs_metaslab_find_max_tries Ns = Ns Sy 100 Pq int +When not trying hard, we only consider this number of the best metaslabs. +This improves performance, especially when there are many metaslabs per vdev +and the allocation can't actually be satisfied +(so we would otherwise iterate all metaslabs). +. +.It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq int +When a vdev is added, target this number of metaslabs per top-level vdev. +. +.It Sy zfs_vdev_default_ms_shift Ns = Ns Sy 29 Po 512MB Pc Pq int +Default limit for metaslab size. +. +.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy ASHIFT_MAX Po 16 Pc Pq ulong +Maximum ashift used when optimizing for logical -> physical sector size on new +top-level vdevs. +. +.It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq ulong +Minimum ashift used when creating new top-level vdevs. +. +.It Sy zfs_vdev_min_ms_count Ns = Ns Sy 16 Pq int +Minimum number of metaslabs to create in a top-level vdev. +. +.It Sy vdev_validate_skip Ns = Ns Sy 0 Ns | Ns 1 Pq int +Skip label validation steps during pool import. +Changing is not recommended unless you know what you're doing +and are recovering a damaged label. +. +.It Sy zfs_vdev_ms_count_limit Ns = Ns Sy 131072 Po 128k Pc Pq int +Practical upper limit of total metaslabs per top-level vdev. +. +.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable metaslab group preloading. +. +.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Give more weight to metaslabs with lower LBAs, +assuming they have greater bandwidth, +as is typically the case on a modern constant angular velocity disk drive. +. +.It Sy metaslab_unload_delay Ns = Ns Sy 32 Pq int +After a metaslab is used, we keep it loaded for this many TXGs, to attempt to +reduce unnecessary reloading. +Note that both this many TXGs and +.Sy metaslab_unload_delay_ms +milliseconds must pass before unloading will occur. +. +.It Sy metaslab_unload_delay_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq int +After a metaslab is used, we keep it loaded for this many milliseconds, +to attempt to reduce unnecessary reloading. +Note, that both this many milliseconds and +.Sy metaslab_unload_delay +TXGs must pass before unloading will occur. +. +.It Sy reference_history Ns = Ns Sy 3 Pq int +Maximum reference holders being tracked when reference_tracking_enable is active. +. +.It Sy reference_tracking_enable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Track reference holders to +.Sy refcount_t +objects (debug builds only). +. +.It Sy send_holes_without_birth_time Ns = Ns Sy 1 Ns | Ns 0 Pq int +When set, the +.Sy hole_birth +optimization will not be used, and all holes will always be sent during a +.Nm zfs Cm send . +This is useful if you suspect your datasets are affected by a bug in +.Sy hole_birth . +. +.It Sy spa_config_path Ns = Ns Pa /etc/zfs/zpool.cache Pq charp +SPA config file. +. +.It Sy spa_asize_inflation Ns = Ns Sy 24 Pq int +Multiplication factor used to estimate actual disk consumption from the +size of data being written. +The default value is a worst case estimate, +but lower values may be valid for a given pool depending on its configuration. +Pool administrators who understand the factors involved +may wish to specify a more realistic inflation factor, +particularly if they operate close to quota or capacity limits. +. +.It Sy spa_load_print_vdev_tree Ns = Ns Sy 0 Ns | Ns 1 Pq int +Whether to print the vdev tree in the debugging message buffer during pool import. +. +.It Sy spa_load_verify_data Ns = Ns Sy 1 Ns | Ns 0 Pq int +Whether to traverse data blocks during an "extreme rewind" +.Pq Fl X +import. +.Pp +An extreme rewind import normally performs a full traversal of all +blocks in the pool for verification. +If this parameter is unset, the traversal skips non-metadata blocks. +It can be toggled once the +import has started to stop or start the traversal of non-metadata blocks. +. +.It Sy spa_load_verify_metadata Ns = Ns Sy 1 Ns | Ns 0 Pq int +Whether to traverse blocks during an "extreme rewind" +.Pq Fl X +pool import. +.Pp +An extreme rewind import normally performs a full traversal of all +blocks in the pool for verification. +If this parameter is unset, the traversal is not performed. +It can be toggled once the import has started to stop or start the traversal. +. +.It Sy spa_load_verify_shift Ns = Ns Sy 4 Po 1/16th Pc Pq int +Sets the maximum number of bytes to consume during pool import to the log2 +fraction of the target ARC size. +. +.It Sy spa_slop_shift Ns = Ns Sy 5 Po 1/32nd Pc Pq int +Normally, we don't allow the last +.Sy 3.2% Pq Sy 1/2^spa_slop_shift +of space in the pool to be consumed. +This ensures that we don't run the pool completely out of space, +due to unaccounted changes (e.g. to the MOS). +It also limits the worst-case time to allocate space. +If we have less than this amount of free space, +most ZPL operations (e.g. write, create) will return +.Sy ENOSPC . +. +.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int +During top-level vdev removal, chunks of data are copied from the vdev +which may include free space in order to trade bandwidth for IOPS. +This parameter determines the maximum span of free space, in bytes, +which will be included as "unnecessary" data in a chunk of copied data. +.Pp +The default value here was chosen to align with +.Sy zfs_vdev_read_gap_limit , +which is a similar concept when doing +regular reads (but there's no reason it has to be the same). +. +.It Sy vdev_file_logical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong +Logical ashift for file-based devices. +. +.It Sy vdev_file_physical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong +Physical ashift for file-based devices. +. +.It Sy zap_iterate_prefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int +If set, when we start iterating over a ZAP object, +prefetch the entire object (all leaf blocks). +However, this is limited by +.Sy dmu_prefetch_max . +. +.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong +If prefetching is enabled, disable prefetching for reads larger than this size. +. +.It Sy zfetch_max_distance Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq uint +Max bytes to prefetch per stream. +. +.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64MB Pc Pq uint +Max bytes to prefetch indirects for per stream. +. +.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint +Max number of streams per zfetch (prefetch streams per file). +. +.It Sy zfetch_min_sec_reap Ns = Ns Sy 2 Pq uint +Min time before an active prefetch stream can be reclaimed +. +.It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enables ARC from using scatter/gather lists and forces all allocations to be +linear in kernel memory. +Disabling can improve performance in some code paths +at the expense of fragmented kernel memory. +. +.It Sy zfs_abd_scatter_max_order Ns = Ns Sy MAX_ORDER-1 Pq uint +Maximum number of consecutive memory pages allocated in a single block for +scatter/gather lists. +.Pp +The value of +.Sy MAX_ORDER +depends on kernel configuration. +. +.It Sy zfs_abd_scatter_min_size Ns = Ns Sy 1536 Ns B Po 1.5kB Pc Pq uint +This is the minimum allocation size that will use scatter (page-based) ABDs. +Smaller allocations will use linear ABDs. +. +.It Sy zfs_arc_dnode_limit Ns = Ns Sy 0 Ns B Pq ulong +When the number of bytes consumed by dnodes in the ARC exceeds this number of +bytes, try to unpin some of it in response to demand for non-metadata. +This value acts as a ceiling to the amount of dnode metadata, and defaults to +.Sy 0 , +which indicates that a percent which is based on +.Sy zfs_arc_dnode_limit_percent +of the ARC meta buffers that may be used for dnodes. +.Pp +Also see +.Sy zfs_arc_meta_prune +which serves a similar purpose but is used +when the amount of metadata in the ARC exceeds +.Sy zfs_arc_meta_limit +rather than in response to overall demand for non-metadata. +. +.It Sy zfs_arc_dnode_limit_percent Ns = Ns Sy 10 Ns % Pq ulong +Percentage that can be consumed by dnodes of ARC meta buffers. +.Pp +See also +.Sy zfs_arc_dnode_limit , +which serves a similar purpose but has a higher priority if nonzero. +. +.It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq ulong +Percentage of ARC dnodes to try to scan in response to demand for non-metadata +when the number of bytes consumed by dnodes exceeds +.Sy zfs_arc_dnode_limit . +. +.It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8kB Pc Pq int +The ARC's buffer hash table is sized based on the assumption of an average +block size of this value. +This works out to roughly 1MB of hash table per 1GB of physical memory +with 8-byte pointers. +For configurations with a known larger average block size, +this value can be increased to reduce the memory footprint. +. +.It Sy zfs_arc_eviction_pct Ns = Ns Sy 200 Ns % Pq int +When +.Fn arc_is_overflowing , +.Fn arc_get_data_impl +waits for this percent of the requested amount of data to be evicted. +For example, by default, for every +.Em 2kB +that's evicted, +.Em 1kB +of it may be "reused" by a new allocation. +Since this is above +.Sy 100 Ns % , +it ensures that progress is made towards getting +.Sy arc_size No under Sy arc_c . +Since this is finite, it ensures that allocations can still happen, +even during the potentially long time that +.Sy arc_size No is more than Sy arc_c . +. +.It Sy zfs_arc_evict_batch_limit Ns = Ns Sy 10 Pq int +Number ARC headers to evict per sub-list before proceeding to another sub-list. +This batch-style operation prevents entire sub-lists from being evicted at once +but comes at a cost of additional unlocking and locking. +. +.It Sy zfs_arc_grow_retry Ns = Ns Sy 0 Ns s Pq int +If set to a non zero value, it will replace the +.Sy arc_grow_retry +value with this value. +The +.Sy arc_grow_retry +.No value Pq default Sy 5 Ns s +is the number of seconds the ARC will wait before +trying to resume growth after a memory pressure event. +. +.It Sy zfs_arc_lotsfree_percent Ns = Ns Sy 10 Ns % Pq int +Throttle I/O when free system memory drops below this percentage of total +system memory. +Setting this value to +.Sy 0 +will disable the throttle. +. +.It Sy zfs_arc_max Ns = Ns Sy 0 Ns B Pq ulong +Max size of ARC in bytes. +If +.Sy 0 , +then the max size of ARC is determined by the amount of system memory installed. +Under Linux, half of system memory will be used as the limit. +Under +.Fx , +the larger of +.Sy all_system_memory - 1GB No and Sy 5/8 * all_system_memory +will be used as the limit. +This value must be at least +.Sy 67108864 Ns B Pq 64MB . +.Pp +This value can be changed dynamically, with some caveats. +It cannot be set back to +.Sy 0 +while running, and reducing it below the current ARC size will not cause +the ARC to shrink without memory pressure to induce shrinking. +. +.It Sy zfs_arc_meta_adjust_restarts Ns = Ns Sy 4096 Pq ulong +The number of restart passes to make while scanning the ARC attempting +the free buffers in order to stay below the +.Sy fs_arc_meta_limit . +This value should not need to be tuned but is available to facilitate +performance analysis. +. +.It Sy zfs_arc_meta_limit Ns = Ns Sy 0 Ns B Pq ulong +The maximum allowed size in bytes that metadata buffers are allowed to +consume in the ARC. +When this limit is reached, metadata buffers will be reclaimed, +even if the overall +.Sy arc_c_max +has not been reached. +It defaults to +.Sy 0 , +which indicates that a percentage based on +.Sy zfs_arc_meta_limit_percent +of the ARC may be used for metadata. +.Pp +This value my be changed dynamically, except that must be set to an explicit value +.Pq cannot be set back to Sy 0 . +. +.It Sy zfs_arc_meta_limit_percent Ns = Ns Sy 75 Ns % Pq ulong +Percentage of ARC buffers that can be used for metadata. +.Pp +See also +.Sy zfs_arc_meta_limit , +which serves a similar purpose but has a higher priority if nonzero. +. +.It Sy zfs_arc_meta_min Ns = Ns Sy 0 Ns B Pq ulong +The minimum allowed size in bytes that metadata buffers may consume in +the ARC. +. +.It Sy zfs_arc_meta_prune Ns = Ns Sy 10000 Pq int +The number of dentries and inodes to be scanned looking for entries +which can be dropped. +This may be required when the ARC reaches the +.Sy zfs_arc_meta_limit +because dentries and inodes can pin buffers in the ARC. +Increasing this value will cause to dentry and inode caches +to be pruned more aggressively. +Setting this value to +.Sy 0 +will disable pruning the inode and dentry caches. +. +.It Sy zfs_arc_meta_strategy Ns = Ns Sy 1 Ns | Ns 0 Pq int +Define the strategy for ARC metadata buffer eviction (meta reclaim strategy): +.Bl -tag -compact -offset 4n -width "0 (META_ONLY)" +.It Sy 0 Pq META_ONLY +evict only the ARC metadata buffers +.It Sy 1 Pq BALANCED +additional data buffers may be evicted if required +to evict the required number of metadata buffers. +.El +. +.It Sy zfs_arc_min Ns = Ns Sy 0 Ns B Pq ulong +Min size of ARC in bytes. +.No If set to Sy 0 , arc_c_min +will default to consuming the larger of +.Sy 32MB No or Sy all_system_memory/32 . +. +.It Sy zfs_arc_min_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 1s Pc Pq int +Minimum time prefetched blocks are locked in the ARC. +. +.It Sy zfs_arc_min_prescient_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 6s Pc Pq int +Minimum time "prescient prefetched" blocks are locked in the ARC. +These blocks are meant to be prefetched fairly aggressively ahead of +the code that may use them. +. +.It Sy zfs_max_missing_tvds Ns = Ns Sy 0 Pq int +Number of missing top-level vdevs which will be allowed during +pool import (only in read-only mode). +. +.It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq ulong +Maximum size in bytes allowed to be passed as +.Sy zc_nvlist_src_size +for ioctls on +.Pa /dev/zfs . +This prevents a user from causing the kernel to allocate +an excessive amount of memory. +When the limit is exceeded, the ioctl fails with +.Sy EINVAL +and a description of the error is sent to the +.Pa zfs-dbgmsg +log. +This parameter should not need to be touched under normal circumstances. +If +.Sy 0 , +equivalent to a quarter of the user-wired memory limit under +.Fx +and to +.Sy 134217728 Ns B Pq 128MB +under Linux. +. +.It Sy zfs_multilist_num_sublists Ns = Ns Sy 0 Pq int +To allow more fine-grained locking, each ARC state contains a series +of lists for both data and metadata objects. +Locking is performed at the level of these "sub-lists". +This parameters controls the number of sub-lists per ARC state, +and also applies to other uses of the multilist data structure. +.Pp +If +.Sy 0 , +equivalent to the greater of the number of online CPUs and +.Sy 4 . +. +.It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int +The ARC size is considered to be overflowing if it exceeds the current +ARC target size +.Pq Sy arc_c +by a threshold determined by this parameter. +The threshold is calculated as a fraction of +.Sy arc_c +using the formula +.Sy arc_c >> zfs_arc_overflow_shift . +.Pp +The default value of +.Sy 8 +causes the ARC to be considered overflowing if it exceeds the target size by +.Em 1/256th Pq Em 0.3% +of the target size. +.Pp +When the ARC is overflowing, new buffer allocations are stalled until +the reclaim thread catches up and the overflow condition no longer exists. +. +.It Sy zfs_arc_p_min_shift Ns = Ns Sy 0 Pq int +If nonzero, this will update +.Sy arc_p_min_shift Pq default Sy 4 +with the new value. +.Sy arc_p_min_shift No is used as a shift of Sy arc_c +when calculating the minumum +.Sy arc_p No size. +. +.It Sy zfs_arc_p_dampener_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int +Disable +.Sy arc_p +adapt dampener, which reduces the maximum single adjustment to +.Sy arc_p . +. +.It Sy zfs_arc_shrink_shift Ns = Ns Sy 0 Pq int +If nonzero, this will update +.Sy arc_shrink_shift Pq default Sy 7 +with the new value. +. +.It Sy zfs_arc_pc_percent Ns = Ns Sy 0 Ns % Po off Pc Pq uint +Percent of pagecache to reclaim ARC to. +.Pp +This tunable allows the ZFS ARC to play more nicely +with the kernel's LRU pagecache. +It can guarantee that the ARC size won't collapse under scanning +pressure on the pagecache, yet still allows the ARC to be reclaimed down to +.Sy zfs_arc_min +if necessary. +This value is specified as percent of pagecache size (as measured by +.Sy NR_FILE_PAGES ) , +where that percent may exceed +.Sy 100 . +This +only operates during memory pressure/reclaim. +. +.It Sy zfs_arc_shrinker_limit Ns = Ns Sy 10000 Pq int +This is a limit on how many pages the ARC shrinker makes available for +eviction in response to one page allocation attempt. +Note that in practice, the kernel's shrinker can ask us to evict +up to about four times this for one allocation attempt. +.Pp +The default limit of +.Sy 10000 Pq in practice, Em 160MB No per allocation attempt with 4kB pages +limits the amount of time spent attempting to reclaim ARC memory to +less than 100ms per allocation attempt, +even with a small average compressed block size of ~8kB. +.Pp +The parameter can be set to 0 (zero) to disable the limit, +and only applies on Linux. +. +.It Sy zfs_arc_sys_free Ns = Ns Sy 0 Ns B Pq ulong +The target number of bytes the ARC should leave as free memory on the system. +If zero, equivalent to the bigger of +.Sy 512kB No and Sy all_system_memory/64 . +. +.It Sy zfs_autoimport_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int +Disable pool import at module load by ignoring the cache file +.Pq Sy spa_config_path . +. +.It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint +Rate limit checksum events to this many per second. +Note that this should not be set below the ZED thresholds +(currently 10 checksums over 10 seconds) +or else the daemon may not trigger any action. +. +.It Sy zfs_commit_timeout_pct Ns = Ns Sy 5 Ns % Pq int +This controls the amount of time that a ZIL block (lwb) will remain "open" +when it isn't "full", and it has a thread waiting for it to be committed to +stable storage. +The timeout is scaled based on a percentage of the last lwb +latency to avoid significantly impacting the latency of each individual +transaction record (itx). +. +.It Sy zfs_condense_indirect_commit_entry_delay_ms Ns = Ns Sy 0 Ns ms Pq int +Vdev indirection layer (used for device removal) sleeps for this many +milliseconds during mapping generation. +Intended for use with the test suite to throttle vdev removal speed. +. +.It Sy zfs_condense_indirect_obsolete_pct Ns = Ns Sy 25 Ns % Pq int +Minimum percent of obsolete bytes in vdev mapping required to attempt to condense +.Pq see Sy zfs_condense_indirect_vdevs_enable . +Intended for use with the test suite +to facilitate triggering condensing as needed. +. +.It Sy zfs_condense_indirect_vdevs_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable condensing indirect vdev mappings. +When set, attempt to condense indirect vdev mappings +if the mapping uses more than +.Sy zfs_condense_min_mapping_bytes +bytes of memory and if the obsolete space map object uses more than +.Sy zfs_condense_max_obsolete_bytes +bytes on-disk. +The condensing process is an attempt to save memory by removing obsolete mappings. +. +.It Sy zfs_condense_max_obsolete_bytes Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong +Only attempt to condense indirect vdev mappings if the on-disk size +of the obsolete space map object is greater than this number of bytes +.Pq see Sy zfs_condense_indirect_vdevs_enable . +. +.It Sy zfs_condense_min_mapping_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq ulong +Minimum size vdev mapping to attempt to condense +.Pq see Sy zfs_condense_indirect_vdevs_enable . +. +.It Sy zfs_dbgmsg_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int +Internally ZFS keeps a small log to facilitate debugging. +The log is enabled by default, and can be disabled by unsetting this option. +The contents of the log can be accessed by reading +.Pa /proc/spl/kstat/zfs/dbgmsg . +Writing +.Sy 0 +to the file clears the log. +.Pp +This setting does not influence debug prints due to +.Sy zfs_flags . +. +.It Sy zfs_dbgmsg_maxsize Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int +Maximum size of the internal ZFS debug log. +. +.It Sy zfs_dbuf_state_index Ns = Ns Sy 0 Pq int +Historically used for controlling what reporting was available under +.Pa /proc/spl/kstat/zfs . +No effect. +. +.It Sy zfs_deadman_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +When a pool sync operation takes longer than +.Sy zfs_deadman_synctime_ms , +or when an individual I/O operation takes longer than +.Sy zfs_deadman_ziotime_ms , +then the operation is considered to be "hung". +If +.Sy zfs_deadman_enabled +is set, then the deadman behavior is invoked as described by +.Sy zfs_deadman_failmode . +By default, the deadman is enabled and set to +.Sy wait +which results in "hung" I/Os only being logged. +The deadman is automatically disabled when a pool gets suspended. +. +.It Sy zfs_deadman_failmode Ns = Ns Sy wait Pq charp +Controls the failure behavior when the deadman detects a "hung" I/O operation. +Valid values are: +.Bl -tag -compact -offset 4n -width "continue" +.It Sy wait +Wait for a "hung" operation to complete. +For each "hung" operation a "deadman" event will be posted +describing that operation. +.It Sy continue +Attempt to recover from a "hung" operation by re-dispatching it +to the I/O pipeline if possible. +.It Sy panic +Panic the system. +This can be used to facilitate automatic fail-over +to a properly configured fail-over partner. +.El +. +.It Sy zfs_deadman_checktime_ms Ns = Ns Sy 60000 Ns ms Po 1min Pc Pq int +Check time in milliseconds. +This defines the frequency at which we check for hung I/O requests +and potentially invoke the +.Sy zfs_deadman_failmode +behavior. +. +.It Sy zfs_deadman_synctime_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq ulong +Interval in milliseconds after which the deadman is triggered and also +the interval after which a pool sync operation is considered to be "hung". +Once this limit is exceeded the deadman will be invoked every +.Sy zfs_deadman_checktime_ms +milliseconds until the pool sync completes. +. +.It Sy zfs_deadman_ziotime_ms Ns = Ns Sy 300000 Ns ms Po 5min Pc Pq ulong +Interval in milliseconds after which the deadman is triggered and an +individual I/O operation is considered to be "hung". +As long as the operation remains "hung", +the deadman will be invoked every +.Sy zfs_deadman_checktime_ms +milliseconds until the operation completes. +. +.It Sy zfs_dedup_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int +Enable prefetching dedup-ed blocks which are going to be freed. +. +.It Sy zfs_delay_min_dirty_percent Ns = Ns Sy 60 Ns % Pq int +Start to delay each transaction once there is this amount of dirty data, +expressed as a percentage of +.Sy zfs_dirty_data_max . +This value should be at least +.Sy zfs_vdev_async_write_active_max_dirty_percent . +.No See Sx ZFS TRANSACTION DELAY . +. +.It Sy zfs_delay_scale Ns = Ns Sy 500000 Pq int +This controls how quickly the transaction delay approaches infinity. +Larger values cause longer delays for a given amount of dirty data. +.Pp +For the smoothest delay, this value should be about 1 billion divided +by the maximum number of operations per second. +This will smoothly handle between ten times and a tenth of this number. +.No See Sx ZFS TRANSACTION DELAY . +.Pp +.Sy zfs_delay_scale * zfs_dirty_data_max Em must be smaller than Sy 2^64 . +. +.It Sy zfs_disable_ivset_guid_check Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disables requirement for IVset GUIDs to be present and match when doing a raw +receive of encrypted datasets. +Intended for users whose pools were created with +OpenZFS pre-release versions and now have compatibility issues. +. +.It Sy zfs_key_max_salt_uses Ns = Ns Sy 400000000 Po 4*10^8 Pc Pq ulong +Maximum number of uses of a single salt value before generating a new one for +encrypted datasets. +The default value is also the maximum. +. +.It Sy zfs_object_mutex_size Ns = Ns Sy 64 Pq uint +Size of the znode hashtable used for holds. +.Pp +Due to the need to hold locks on objects that may not exist yet, kernel mutexes +are not created per-object and instead a hashtable is used where collisions +will result in objects waiting when there is not actually contention on the +same object. +. +.It Sy zfs_slow_io_events_per_second Ns = Ns Sy 20 Ns /s Pq int +Rate limit delay and deadman zevents (which report slow I/Os) to this many per +second. +. +.It Sy zfs_unflushed_max_mem_amt Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong +Upper-bound limit for unflushed metadata changes to be held by the +log spacemap in memory, in bytes. +. +.It Sy zfs_unflushed_max_mem_ppm Ns = Ns Sy 1000 Ns ppm Po 0.1% Pc Pq ulong +Part of overall system memory that ZFS allows to be used +for unflushed metadata changes by the log spacemap, in millionths. +. +.It Sy zfs_unflushed_log_block_max Ns = Ns Sy 262144 Po 256k Pc Pq ulong +Describes the maximum number of log spacemap blocks allowed for each pool. +The default value means that the space in all the log spacemaps +can add up to no more than +.Sy 262144 +blocks (which means +.Em 32GB +of logical space before compression and ditto blocks, +assuming that blocksize is +.Em 128kB ) . +.Pp +This tunable is important because it involves a trade-off between import +time after an unclean export and the frequency of flushing metaslabs. +The higher this number is, the more log blocks we allow when the pool is +active which means that we flush metaslabs less often and thus decrease +the number of I/Os for spacemap updates per TXG. +At the same time though, that means that in the event of an unclean export, +there will be more log spacemap blocks for us to read, inducing overhead +in the import time of the pool. +The lower the number, the amount of flushing increases, destroying log +blocks quicker as they become obsolete faster, which leaves less blocks +to be read during import time after a crash. +.Pp +Each log spacemap block existing during pool import leads to approximately +one extra logical I/O issued. +This is the reason why this tunable is exposed in terms of blocks rather +than space used. +. +.It Sy zfs_unflushed_log_block_min Ns = Ns Sy 1000 Pq ulong +If the number of metaslabs is small and our incoming rate is high, +we could get into a situation that we are flushing all our metaslabs every TXG. +Thus we always allow at least this many log blocks. +. +.It Sy zfs_unflushed_log_block_pct Ns = Ns Sy 400 Ns % Pq ulong +Tunable used to determine the number of blocks that can be used for +the spacemap log, expressed as a percentage of the total number of +metaslabs in the pool. +. +.It Sy zfs_unlink_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq uint +When enabled, files will not be asynchronously removed from the list of pending +unlinks and the space they consume will be leaked. +Once this option has been disabled and the dataset is remounted, +the pending unlinks will be processed and the freed space returned to the pool. +This option is used by the test suite. +. +.It Sy zfs_delete_blocks Ns = Ns Sy 20480 Pq ulong +This is the used to define a large file for the purposes of deletion. +Files containing more than +.Sy zfs_delete_blocks +will be deleted asynchronously, while smaller files are deleted synchronously. +Decreasing this value will reduce the time spent in an +.Xr unlink 2 +system call, at the expense of a longer delay before the freed space is available. +. +.It Sy zfs_dirty_data_max Ns = Pq int +Determines the dirty space limit in bytes. +Once this limit is exceeded, new writes are halted until space frees up. +This parameter takes precedence over +.Sy zfs_dirty_data_max_percent . +.No See Sx ZFS TRANSACTION DELAY . +.Pp +Defaults to +.Sy physical_ram/10 , +capped at +.Sy zfs_dirty_data_max_max . +. +.It Sy zfs_dirty_data_max_max Ns = Pq int +Maximum allowable value of +.Sy zfs_dirty_data_max , +expressed in bytes. +This limit is only enforced at module load time, and will be ignored if +.Sy zfs_dirty_data_max +is later changed. +This parameter takes precedence over +.Sy zfs_dirty_data_max_max_percent . +.No See Sx ZFS TRANSACTION DELAY . +.Pp +Defaults to +.Sy physical_ram/4 , +. +.It Sy zfs_dirty_data_max_max_percent Ns = Ns Sy 25 Ns % Pq int +Maximum allowable value of +.Sy zfs_dirty_data_max , +expressed as a percentage of physical RAM. +This limit is only enforced at module load time, and will be ignored if +.Sy zfs_dirty_data_max +is later changed. +The parameter +.Sy zfs_dirty_data_max_max +takes precedence over this one. +.No See Sx ZFS TRANSACTION DELAY . +. +.It Sy zfs_dirty_data_max_percent Ns = Ns Sy 10 Ns % Pq int +Determines the dirty space limit, expressed as a percentage of all memory. +Once this limit is exceeded, new writes are halted until space frees up. +The parameter +.Sy zfs_dirty_data_max +takes precedence over this one. +.No See Sx ZFS TRANSACTION DELAY . +.Pp +Subject to +.Sy zfs_dirty_data_max_max . +. +.It Sy zfs_dirty_data_sync_percent Ns = Ns Sy 20 Ns % Pq int +Start syncing out a transaction group if there's at least this much dirty data +.Pq as a percentage of Sy zfs_dirty_data_max . +This should be less than +.Sy zfs_vdev_async_write_active_min_dirty_percent . +. +.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint +Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be +preallocated for a file in order to guarantee that later writes will not +run out of space. +Instead, +.Xr fallocate 2 +space preallocation only checks that sufficient space is currently available +in the pool or the user's project quota allocation, +and then creates a sparse file of the requested size. +The requested space is multiplied by +.Sy zfs_fallocate_reserve_percent +to allow additional space for indirect blocks and other internal metadata. +Setting this to +.Sy 0 +disables support for +.Xr fallocate 2 +and causes it to return +.Sy EOPNOTSUPP . +. +.It Sy zfs_fletcher_4_impl Ns = Ns Sy fastest Pq string +Select a fletcher 4 implementation. +.Pp +Supported selectors are: +.Sy fastest , scalar , sse2 , ssse3 , avx2 , avx512f , avx512bw , +.No and Sy aarch64_neon . +All except +.Sy fastest No and Sy scalar +require instruction set extensions to be available, +and will only appear if ZFS detects that they are present at runtime. +If multiple implementations of fletcher 4 are available, the +.Sy fastest +will be chosen using a micro benchmark. +Selecting +.Sy scalar +results in the original CPU-based calculation being used. +Selecting any option other than +.Sy fastest No or Sy scalar +results in vector instructions +from the respective CPU instruction set being used. +. +.It Sy zfs_free_bpobj_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enable/disable the processing of the free_bpobj object. +. +.It Sy zfs_async_block_max_blocks Ns = Ns Sy ULONG_MAX Po unlimited Pc Pq ulong +Maximum number of blocks freed in a single TXG. +. +.It Sy zfs_max_async_dedup_frees Ns = Ns Sy 100000 Po 10^5 Pc Pq ulong +Maximum number of dedup blocks freed in a single TXG. +. +.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Pq ulong +If nonzer, override record size calculation for +.Nm zfs Cm send +estimates. +. +.It Sy zfs_vdev_async_read_max_active Ns = Ns Sy 3 Pq int +Maximum asynchronous read I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_async_read_min_active Ns = Ns Sy 1 Pq int +Minimum asynchronous read I/O operation active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_async_write_active_max_dirty_percent Ns = Ns Sy 60 Ns % Pq int +When the pool has more than this much dirty data, use +.Sy zfs_vdev_async_write_max_active +to limit active async writes. +If the dirty data is between the minimum and maximum, +the active I/O limit is linearly interpolated. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_async_write_active_min_dirty_percent Ns = Ns Sy 30 Ns % Pq int +When the pool has less than this much dirty data, use +.Sy zfs_vdev_async_write_min_active +to limit active async writes. +If the dirty data is between the minimum and maximum, +the active I/O limit is linearly +interpolated. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_async_write_max_active Ns = Ns Sy 30 Pq int +Maximum asynchronous write I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_async_write_min_active Ns = Ns Sy 2 Pq int +Minimum asynchronous write I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +.Pp +Lower values are associated with better latency on rotational media but poorer +resilver performance. +The default value of +.Sy 2 +was chosen as a compromise. +A value of +.Sy 3 +has been shown to improve resilver performance further at a cost of +further increasing latency. +. +.It Sy zfs_vdev_initializing_max_active Ns = Ns Sy 1 Pq int +Maximum initializing I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_initializing_min_active Ns = Ns Sy 1 Pq int +Minimum initializing I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_max_active Ns = Ns Sy 1000 Pq int +The maximum number of I/O operations active to each device. +Ideally, this will be at least the sum of each queue's +.Sy max_active . +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_rebuild_max_active Ns = Ns Sy 3 Pq int +Maximum sequential resilver I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_rebuild_min_active Ns = Ns Sy 1 Pq int +Minimum sequential resilver I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_removal_max_active Ns = Ns Sy 2 Pq int +Maximum removal I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_removal_min_active Ns = Ns Sy 1 Pq int +Minimum removal I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_scrub_max_active Ns = Ns Sy 2 Pq int +Maximum scrub I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_scrub_min_active Ns = Ns Sy 1 Pq int +Minimum scrub I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_sync_read_max_active Ns = Ns Sy 10 Pq int +Maximum synchronous read I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_sync_read_min_active Ns = Ns Sy 10 Pq int +Minimum synchronous read I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_sync_write_max_active Ns = Ns Sy 10 Pq int +Maximum synchronous write I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_sync_write_min_active Ns = Ns Sy 10 Pq int +Minimum synchronous write I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_trim_max_active Ns = Ns Sy 2 Pq int +Maximum trim/discard I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_trim_min_active Ns = Ns Sy 1 Pq int +Minimum trim/discard I/O operations active to each device. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_nia_delay Ns = Ns Sy 5 Pq int +For non-interactive I/O (scrub, resilver, removal, initialize and rebuild), +the number of concurrently-active I/O operations is limited to +.Sy zfs_*_min_active , +unless the vdev is "idle". +When there are no interactive I/O operatinons active (synchronous or otherwise), +and +.Sy zfs_vdev_nia_delay +operations have completed since the last interactive operation, +then the vdev is considered to be "idle", +and the number of concurrently-active non-interactive operations is increased to +.Sy zfs_*_max_active . +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_nia_credit Ns = Ns Sy 5 Pq int +Some HDDs tend to prioritize sequential I/O so strongly, that concurrent +random I/O latency reaches several seconds. +On some HDDs this happens even if sequential I/O operations +are submitted one at a time, and so setting +.Sy zfs_*_max_active Ns = Sy 1 +does not help. +To prevent non-interactive I/O, like scrub, +from monopolizing the device, no more than +.Sy zfs_vdev_nia_credit operations can be sent +while there are outstanding incomplete interactive operations. +This enforced wait ensures the HDD services the interactive I/O +within a reasonable amount of time. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_queue_depth_pct Ns = Ns Sy 1000 Ns % Pq int +Maximum number of queued allocations per top-level vdev expressed as +a percentage of +.Sy zfs_vdev_async_write_max_active , +which allows the system to detect devices that are more capable +of handling allocations and to allocate more blocks to those devices. +This allows for dynamic allocation distribution when devices are imbalanced, +as fuller devices will tend to be slower than empty devices. +.Pp +Also see +.Sy zio_dva_throttle_enabled . +. +.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int +Time before expiring +.Pa .zfs/snapshot . +. +.It Sy zfs_admin_snapshot Ns = Ns Sy 0 Ns | Ns 1 Pq int +Allow the creation, removal, or renaming of entries in the +.Sy .zfs/snapshot +directory to cause the creation, destruction, or renaming of snapshots. +When enabled, this functionality works both locally and over NFS exports +which have the +.Em no_root_squash +option set. +. +.It Sy zfs_flags Ns = Ns Sy 0 Pq int +Set additional debugging flags. +The following flags may be bitwise-ored together: +.TS +box; +lbz r l l . + Value Symbolic Name Description +_ + 1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log. +* 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications. +* 4 ZFS_DEBUG_DNODE_VERIFY Enable extra dnode verifications. + 8 ZFS_DEBUG_SNAPNAMES Enable snapshot name verification. + 16 ZFS_DEBUG_MODIFY Check for illegally modified ARC buffers. + 64 ZFS_DEBUG_ZIO_FREE Enable verification of block frees. + 128 ZFS_DEBUG_HISTOGRAM_VERIFY Enable extra spacemap histogram verifications. + 256 ZFS_DEBUG_METASLAB_VERIFY Verify space accounting on disk matches in-memory \fBrange_trees\fP. + 512 ZFS_DEBUG_SET_ERROR Enable \fBSET_ERROR\fP and dprintf entries in the debug log. + 1024 ZFS_DEBUG_INDIRECT_REMAP Verify split blocks created by device removal. + 2048 ZFS_DEBUG_TRIM Verify TRIM ranges are always within the allocatable range tree. + 4096 ZFS_DEBUG_LOG_SPACEMAP Verify that the log summary is consistent with the spacemap log + and enable \fBzfs_dbgmsgs\fP for metaslab loading and flushing. +.TE +.Sy \& * No Requires debug build. +. +.It Sy zfs_free_leak_on_eio Ns = Ns Sy 0 Ns | Ns 1 Pq int +If destroy encounters an +.Sy EIO +while reading metadata (e.g. indirect blocks), +space referenced by the missing metadata can not be freed. +Normally this causes the background destroy to become "stalled", +as it is unable to make forward progress. +While in this stalled state, all remaining space to free +from the error-encountering filesystem is "temporarily leaked". +Set this flag to cause it to ignore the +.Sy EIO , +permanently leak the space from indirect blocks that can not be read, +and continue to free everything else that it can. +.Pp +The default "stalling" behavior is useful if the storage partially +fails (i.e. some but not all I/O operations fail), and then later recovers. +In this case, we will be able to continue pool operations while it is +partially failed, and when it recovers, we can continue to free the +space, with no leaks. +Note, however, that this case is actually fairly rare. +.Pp +Typically pools either +.Bl -enum -compact -offset 4n -width "1." +.It +fail completely (but perhaps temporarily, +e.g. due to a top-level vdev going offline), or +.It +have localized, permanent errors (e.g. disk returns the wrong data +due to bit flip or firmware bug). +.El +In the former case, this setting does not matter because the +pool will be suspended and the sync thread will not be able to make +forward progress regardless. +In the latter, because the error is permanent, the best we can do +is leak the minimum amount of space, +which is what setting this flag will do. +It is therefore reasonable for this flag to normally be set, +but we chose the more conservative approach of not setting it, +so that there is no possibility of +leaking space in the "partial temporary" failure case. +. +.It Sy zfs_free_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int +During a +.Nm zfs Cm destroy +operation using the +.Sy async_destroy +feature, +a minimum of this much time will be spent working on freeing blocks per TXG. +. +.It Sy zfs_obsolete_min_time_ms Ns = Ns Sy 500 Ns ms Pq int +Similar to +.Sy zfs_free_min_time_ms , +but for cleanup of old indirection records for removed vdevs. +. +.It Sy zfs_immediate_write_sz Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq long +Largest data block to write to the ZIL. +Larger blocks will be treated as if the dataset being written to had the +.Sy logbias Ns = Ns Sy throughput +property set. +. +.It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq ulong +Pattern written to vdev free space by +.Xr zpool-initialize 8 . +. +.It Sy zfs_initialize_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong +Size of writes used by +.Xr zpool-initialize 8 . +This option is used by the test suite. +. +.It Sy zfs_livelist_max_entries Ns = Ns Sy 500000 Po 5*10^5 Pc Pq ulong +The threshold size (in block pointers) at which we create a new sub-livelist. +Larger sublists are more costly from a memory perspective but the fewer +sublists there are, the lower the cost of insertion. +. +.It Sy zfs_livelist_min_percent_shared Ns = Ns Sy 75 Ns % Pq int +If the amount of shared space between a snapshot and its clone drops below +this threshold, the clone turns off the livelist and reverts to the old +deletion method. +This is in place because livelists no long give us a benefit +once a clone has been overwritten enough. +. +.It Sy zfs_livelist_condense_new_alloc Ns = Ns Sy 0 Pq int +Incremented each time an extra ALLOC blkptr is added to a livelist entry while +it is being condensed. +This option is used by the test suite to track race conditions. +. +.It Sy zfs_livelist_condense_sync_cancel Ns = Ns Sy 0 Pq int +Incremented each time livelist condensing is canceled while in +.Fn spa_livelist_condense_sync . +This option is used by the test suite to track race conditions. +. +.It Sy zfs_livelist_condense_sync_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int +When set, the livelist condense process pauses indefinitely before +executing the synctask - +.Fn spa_livelist_condense_sync . +This option is used by the test suite to trigger race conditions. +. +.It Sy zfs_livelist_condense_zthr_cancel Ns = Ns Sy 0 Pq int +Incremented each time livelist condensing is canceled while in +.Fn spa_livelist_condense_cb . +This option is used by the test suite to track race conditions. +. +.It Sy zfs_livelist_condense_zthr_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int +When set, the livelist condense process pauses indefinitely before +executing the open context condensing work in +.Fn spa_livelist_condense_cb . +This option is used by the test suite to trigger race conditions. +. +.It Sy zfs_lua_max_instrlimit Ns = Ns Sy 100000000 Po 10^8 Pc Pq ulong +The maximum execution time limit that can be set for a ZFS channel program, +specified as a number of Lua instructions. +. +.It Sy zfs_lua_max_memlimit Ns = Ns Sy 104857600 Po 100MB Pc Pq ulong +The maximum memory limit that can be set for a ZFS channel program, specified +in bytes. +. +.It Sy zfs_max_dataset_nesting Ns = Ns Sy 50 Pq int +The maximum depth of nested datasets. +This value can be tuned temporarily to +fix existing datasets that exceed the predefined limit. +. +.It Sy zfs_max_log_walking Ns = Ns Sy 5 Pq ulong +The number of past TXGs that the flushing algorithm of the log spacemap +feature uses to estimate incoming log blocks. +. +.It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong +Maximum number of rows allowed in the summary of the spacemap log. +. +.It Sy zfs_max_recordsize Ns = Ns Sy 1048576 Po 1MB Pc Pq int +We currently support block sizes from +.Em 512B No to Em 16MB . +The benefits of larger blocks, and thus larger I/O, +need to be weighed against the cost of COWing a giant block to modify one byte. +Additionally, very large blocks can have an impact on I/O latency, +and also potentially on the memory allocator. +Therefore, we do not allow the recordsize to be set larger than this tunable. +Larger blocks can be created by changing it, +and pools with larger blocks can always be imported and used, +regardless of this setting. +. +.It Sy zfs_allow_redacted_dataset_mount Ns = Ns Sy 0 Ns | Ns 1 Pq int +Allow datasets received with redacted send/receive to be mounted. +Normally disabled because these datasets may be missing key data. +. +.It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq ulong +Minimum number of metaslabs to flush per dirty TXG. +. +.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq int +Allow metaslabs to keep their active state as long as their fragmentation +percentage is no more than this value. +An active metaslab that exceeds this threshold +will no longer keep its active status allowing better metaslabs to be selected. +. +.It Sy zfs_mg_fragmentation_threshold Ns = Ns Sy 95 Ns % Pq int +Metaslab groups are considered eligible for allocations if their +fragmentation metric (measured as a percentage) is less than or equal to +this value. +If a metaslab group exceeds this threshold then it will be +skipped unless all metaslab groups within the metaslab class have also +crossed this threshold. +. +.It Sy zfs_mg_noalloc_threshold Ns = Ns Sy 0 Ns % Pq int +Defines a threshold at which metaslab groups should be eligible for allocations. +The value is expressed as a percentage of free space +beyond which a metaslab group is always eligible for allocations. +If a metaslab group's free space is less than or equal to the +threshold, the allocator will avoid allocating to that group +unless all groups in the pool have reached the threshold. +Once all groups have reached the threshold, all groups are allowed to accept +allocations. +The default value of +.Sy 0 +disables the feature and causes all metaslab groups to be eligible for allocations. +.Pp +This parameter allows one to deal with pools having heavily imbalanced +vdevs such as would be the case when a new vdev has been added. +Setting the threshold to a non-zero percentage will stop allocations +from being made to vdevs that aren't filled to the specified percentage +and allow lesser filled vdevs to acquire more allocations than they +otherwise would under the old +.Sy zfs_mg_alloc_failures +facility. +. +.It Sy zfs_ddt_data_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int +If enabled, ZFS will place DDT data into the special allocation class. +. +.It Sy zfs_user_indirect_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int +If enabled, ZFS will place user data indirect blocks +into the special allocation class. +. +.It Sy zfs_multihost_history Ns = Ns Sy 0 Pq int +Historical statistics for this many latest multihost updates will be available in +.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /multihost . +. +.It Sy zfs_multihost_interval Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq ulong +Used to control the frequency of multihost writes which are performed when the +.Sy multihost +pool property is on. +This is one of the factors used to determine the +length of the activity check during import. +.Pp +The multihost write period is +.Sy zfs_multihost_interval / leaf-vdevs . +On average a multihost write will be issued for each leaf vdev +every +.Sy zfs_multihost_interval +milliseconds. +In practice, the observed period can vary with the I/O load +and this observed value is the delay which is stored in the uberblock. +. +.It Sy zfs_multihost_import_intervals Ns = Ns Sy 20 Pq uint +Used to control the duration of the activity test on import. +Smaller values of +.Sy zfs_multihost_import_intervals +will reduce the import time but increase +the risk of failing to detect an active pool. +The total activity check time is never allowed to drop below one second. +.Pp +On import the activity check waits a minimum amount of time determined by +.Sy zfs_multihost_interval * zfs_multihost_import_intervals , +or the same product computed on the host which last had the pool imported, +whichever is greater. +The activity check time may be further extended if the value of MMP +delay found in the best uberblock indicates actual multihost updates happened +at longer intervals than +.Sy zfs_multihost_interval . +A minimum of +.Em 100ms +is enforced. +.Pp +.Sy 0 No is equivalent to Sy 1 . +. +.It Sy zfs_multihost_fail_intervals Ns = Ns Sy 10 Pq uint +Controls the behavior of the pool when multihost write failures or delays are +detected. +.Pp +When +.Sy 0 , +multihost write failures or delays are ignored. +The failures will still be reported to the ZED which depending on +its configuration may take action such as suspending the pool or offlining a +device. +.Pp +Otherwise, the pool will be suspended if +.Sy zfs_multihost_fail_intervals * zfs_multihost_interval +milliseconds pass without a successful MMP write. +This guarantees the activity test will see MMP writes if the pool is imported. +.Sy 1 No is equivalent to Sy 2 ; +this is necessary to prevent the pool from being suspended +due to normal, small I/O latency variations. +. +.It Sy zfs_no_scrub_io Ns = Ns Sy 0 Ns | Ns 1 Pq int +Set to disable scrub I/O. +This results in scrubs not actually scrubbing data and +simply doing a metadata crawl of the pool instead. +. +.It Sy zfs_no_scrub_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int +Set to disable block prefetching for scrubs. +. +.It Sy zfs_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable cache flush operations on disks when writing. +Setting this will cause pool corruption on power loss +if a volatile out-of-order write cache is enabled. +. +.It Sy zfs_nopwrite_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Allow no-operation writes. +The occurrence of nopwrites will further depend on other pool properties +.Pq i.a. the checksumming and compression algorithms . +. +.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 0 Ns | ns 1 Pq int +Enable forcing TXG sync to find holes. +When enabled forces ZFS to act like prior versions when +.Sy SEEK_HOLE No or Sy SEEK_DATA +flags are used, which, when a dnode is dirty, +causes TXGs to be synced so that this data can be found. +. +.It Sy zfs_pd_bytes_max Ns = Ns Sy 52428800 Ns B Po 50MB Pc Pq int +The number of bytes which should be prefetched during a pool traversal, like +.Nm zfs Cm send +or other data crawling operations. +. +.It Sy zfs_traverse_indirect_prefetch_limit Ns = Ns Sy 32 Pq int +The number of blocks pointed by indirect (non-L0) block which should be +prefetched during a pool traversal, like +.Nm zfs Cm send +or other data crawling operations. +. +.It Sy zfs_per_txg_dirty_frees_percent Ns = Ns Sy 5 Ns % Pq ulong +Control percentage of dirtied indirect blocks from frees allowed into one TXG. +After this threshold is crossed, additional frees will wait until the next TXG. +.Sy 0 No disables this throttle. +. +.It Sy zfs_prefetch_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable predictive prefetch. +Note that it leaves "prescient" prefetch (for. e.g.\& +.Nm zfs Cm send ) +intact. +Unlike predictive prefetch, prescient prefetch never issues I/O +that ends up not being needed, so it can't hurt performance. +. +.It Sy zfs_qat_checksum_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable QAT hardware acceleration for SHA256 checksums. +May be unset after the ZFS modules have been loaded to initialize the QAT +hardware as long as support is compiled in and the QAT driver is present. +. +.It Sy zfs_qat_compress_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable QAT hardware acceleration for gzip compression. +May be unset after the ZFS modules have been loaded to initialize the QAT +hardware as long as support is compiled in and the QAT driver is present. +. +.It Sy zfs_qat_encrypt_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable QAT hardware acceleration for AES-GCM encryption. +May be unset after the ZFS modules have been loaded to initialize the QAT +hardware as long as support is compiled in and the QAT driver is present. +. +.It Sy zfs_vnops_read_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq long +Bytes to read per chunk. +. +.It Sy zfs_read_history Ns = Ns Sy 0 Pq int +Historical statistics for this many latest reads will be available in +.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /reads . +. +.It Sy zfs_read_history_hits Ns = Ns Sy 0 Ns | Ns 1 Pq int +Include cache hits in read history +. +.It Sy zfs_rebuild_max_segment Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong +Maximum read segment size to issue when sequentially resilvering a +top-level vdev. +. +.It Sy zfs_rebuild_scrub_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Automatically start a pool scrub when the last active sequential resilver +completes in order to verify the checksums of all blocks which have been +resilvered. +This is enabled by default and strongly recommended. +. +.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong +Maximum amount of I/O that can be concurrently issued for a sequential +resilver per leaf device, given in bytes. +. +.It Sy zfs_reconstruct_indirect_combinations_max Ns = Ns Sy 4096 Pq int +If an indirect split block contains more than this many possible unique +combinations when being reconstructed, consider it too computationally +expensive to check them all. +Instead, try at most this many randomly selected +combinations each time the block is accessed. +This allows all segment copies to participate fairly +in the reconstruction when all combinations +cannot be checked and prevents repeated use of one bad copy. +. +.It Sy zfs_recover Ns = Ns Sy 0 Ns | Ns 1 Pq int +Set to attempt to recover from fatal errors. +This should only be used as a last resort, +as it typically results in leaked space, or worse. +. +.It Sy zfs_removal_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int +Ignore hard IO errors during device removal. +When set, if a device encounters a hard IO error during the removal process +the removal will not be cancelled. +This can result in a normally recoverable block becoming permanently damaged +and is hence not recommended. +This should only be used as a last resort when the +pool cannot be returned to a healthy state prior to removing the device. +. +.It Sy zfs_removal_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int +This is used by the test suite so that it can ensure that certain actions +happen while in the middle of a removal. +. +.It Sy zfs_remove_max_segment Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int +The largest contiguous segment that we will attempt to allocate when removing +a device. +If there is a performance problem with attempting to allocate large blocks, +consider decreasing this. +The default value is also the maximum. +. +.It Sy zfs_resilver_disable_defer Ns = Ns Sy 0 Ns | Ns 1 Pq int +Ignore the +.Sy resilver_defer +feature, causing an operation that would start a resilver to +immediately restart the one in progress. +. +.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3s Pc Pq int +Resilvers are processed by the sync thread. +While resilvering, it will spend at least this much time +working on a resilver between TXG flushes. +. +.It Sy zfs_scan_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int +If set, remove the DTL (dirty time list) upon completion of a pool scan (scrub), +even if there were unrepairable errors. +Intended to be used during pool repair or recovery to +stop resilvering when the pool is next imported. +. +.It Sy zfs_scrub_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int +Scrubs are processed by the sync thread. +While scrubbing, it will spend at least this much time +working on a scrub between TXG flushes. +. +.It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2h Pc Pq int +To preserve progress across reboots, the sequential scan algorithm periodically +needs to stop metadata scanning and issue all the verification I/O to disk. +The frequency of this flushing is determined by this tunable. +. +.It Sy zfs_scan_fill_weight Ns = Ns Sy 3 Pq int +This tunable affects how scrub and resilver I/O segments are ordered. +A higher number indicates that we care more about how filled in a segment is, +while a lower number indicates we care more about the size of the extent without +considering the gaps within a segment. +This value is only tunable upon module insertion. +Changing the value afterwards will have no affect on scrub or resilver performance. +. +.It Sy zfs_scan_issue_strategy Ns = Ns Sy 0 Pq int +Determines the order that data will be verified while scrubbing or resilvering: +.Bl -tag -compact -offset 4n -width "a" +.It Sy 1 +Data will be verified as sequentially as possible, given the +amount of memory reserved for scrubbing +.Pq see Sy zfs_scan_mem_lim_fact . +This may improve scrub performance if the pool's data is very fragmented. +.It Sy 2 +The largest mostly-contiguous chunk of found data will be verified first. +By deferring scrubbing of small segments, we may later find adjacent data +to coalesce and increase the segment size. +.It Sy 0 +.No Use strategy Sy 1 No during normal verification +.No and strategy Sy 2 No while taking a checkpoint. +.El +. +.It Sy zfs_scan_legacy Ns = Ns Sy 0 Ns | Ns 1 Pq int +If unset, indicates that scrubs and resilvers will gather metadata in +memory before issuing sequential I/O. +Otherwise indicates that the legacy algorithm will be used, +where I/O is initiated as soon as it is discovered. +Unsetting will not affect scrubs or resilvers that are already in progress. +. +.It Sy zfs_scan_max_ext_gap Ns = Ns Sy 2097152 Ns B Po 2MB Pc Pq int +Sets the largest gap in bytes between scrub/resilver I/O operations +that will still be considered sequential for sorting purposes. +Changing this value will not +affect scrubs or resilvers that are already in progress. +. +.It Sy zfs_scan_mem_lim_fact Ns = Ns Sy 20 Ns ^-1 Pq int +Maximum fraction of RAM used for I/O sorting by sequential scan algorithm. +This tunable determines the hard limit for I/O sorting memory usage. +When the hard limit is reached we stop scanning metadata and start issuing +data verification I/O. +This is done until we get below the soft limit. +. +.It Sy zfs_scan_mem_lim_soft_fact Ns = Ns Sy 20 Ns ^-1 Pq int +The fraction of the hard limit used to determined the soft limit for I/O sorting +by the sequential scan algorithm. +When we cross this limit from below no action is taken. +When we cross this limit from above it is because we are issuing verification I/O. +In this case (unless the metadata scan is done) we stop issuing verification I/O +and start scanning metadata again until we get to the hard limit. +. +.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int +Enforce tight memory limits on pool scans when a sequential scan is in progress. +When disabled, the memory limit may be exceeded by fast disks. +. +.It Sy zfs_scan_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int +Freezes a scrub/resilver in progress without actually pausing it. +Intended for testing/debugging. +. +.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int +Maximum amount of data that can be concurrently issued at once for scrubs and +resilvers per leaf device, given in bytes. +. +.It Sy zfs_send_corrupt_data Ns = Ns Sy 0 Ns | Ns 1 Pq int +Allow sending of corrupt data (ignore read/checksum errors when sending). +. +.It Sy zfs_send_unmodified_spill_blocks Ns = Ns Sy 1 Ns | Ns 0 Pq int +Include unmodified spill blocks in the send stream. +Under certain circumstances, previous versions of ZFS could incorrectly +remove the spill block from an existing object. +Including unmodified copies of the spill blocks creates a backwards-compatible +stream which will recreate a spill block if it was incorrectly removed. +. +.It Sy zfs_send_no_prefetch_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int +The fill fraction of the +.Nm zfs Cm send +internal queues. +The fill fraction controls the timing with which internal threads are woken up. +. +.It Sy zfs_send_no_prefetch_queue_length Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int +The maximum number of bytes allowed in +.Nm zfs Cm send Ns 's +internal queues. +. +.It Sy zfs_send_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int +The fill fraction of the +.Nm zfs Cm send +prefetch queue. +The fill fraction controls the timing with which internal threads are woken up. +. +.It Sy zfs_send_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int +The maximum number of bytes allowed that will be prefetched by +.Nm zfs Cm send . +This value must be at least twice the maximum block size in use. +. +.It Sy zfs_recv_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int +The fill fraction of the +.Nm zfs Cm receive +queue. +The fill fraction controls the timing with which internal threads are woken up. +. +.It Sy zfs_recv_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int +The maximum number of bytes allowed in the +.Nm zfs Cm receive +queue. +This value must be at least twice the maximum block size in use. +. +.It Sy zfs_recv_write_batch_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int +The maximum amount of data, in bytes, that +.Nm zfs Cm receive +will write in one DMU transaction. +This is the uncompressed size, even when receiving a compressed send stream. +This setting will not reduce the write size below a single block. +Capped at a maximum of +.Sy 32MB . +. +.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Ns | Ns 1 Pq ulong +Setting this variable overrides the default logic for estimating block +sizes when doing a +.Nm zfs Cm send . +The default heuristic is that the average block size +will be the current recordsize. +Override this value if most data in your dataset is not of that size +and you require accurate zfs send size estimates. +. +.It Sy zfs_sync_pass_deferred_free Ns = Ns Sy 2 Pq int +Flushing of data to disk is done in passes. +Defer frees starting in this pass. +. +.It Sy zfs_spa_discard_memory_limit Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int +Maximum memory used for prefetching a checkpoint's space map on each +vdev while discarding the checkpoint. +. +.It Sy zfs_special_class_metadata_reserve_pct Ns = Ns Sy 25 Ns % Pq int +Only allow small data blocks to be allocated on the special and dedup vdev +types when the available free space percentage on these vdevs exceeds this value. +This ensures reserved space is available for pool metadata as the +special vdevs approach capacity. +. +.It Sy zfs_sync_pass_dont_compress Ns = Ns Sy 8 Pq int +Starting in this sync pass, disable compression (including of metadata). +With the default setting, in practice, we don't have this many sync passes, +so this has no effect. +.Pp +The original intent was that disabling compression would help the sync passes +to converge. +However, in practice, disabling compression increases +the average number of sync passes; because when we turn compression off, +many blocks' size will change, and thus we have to re-allocate +(not overwrite) them. +It also increases the number of +.Em 128kB +allocations (e.g. for indirect blocks and spacemaps) +because these will not be compressed. +The +.Em 128kB +allocations are especially detrimental to performance +on highly fragmented systems, which may have very few free segments of this size, +and may need to load new metaslabs to satisfy these allocations. +. +.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq int +Rewrite new block pointers starting in this pass. +. +.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int +This controls the number of threads used by +.Sy dp_sync_taskq . +The default value of +.Sy 75% +will create a maximum of one thread per CPU. +. +.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq uint +Maximum size of TRIM command. +Larger ranges will be split into chunks no larger than this value before issuing. +. +.It Sy zfs_trim_extent_bytes_min Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq uint +Minimum size of TRIM commands. +TRIM ranges smaller than this will be skipped, +unless they're part of a larger range which was chunked. +This is done because it's common for these small TRIMs +to negatively impact overall performance. +. +.It Sy zfs_trim_metaslab_skip Ns = Ns Sy 0 Ns | Ns 1 Pq uint +Skip uninitialized metaslabs during the TRIM process. +This option is useful for pools constructed from large thinly-provisioned devices +where TRIM operations are slow. +As a pool ages, an increasing fraction of the pool's metaslabs +will be initialized, progressively degrading the usefulness of this option. +This setting is stored when starting a manual TRIM and will +persist for the duration of the requested TRIM. +. +.It Sy zfs_trim_queue_limit Ns = Ns Sy 10 Pq uint +Maximum number of queued TRIMs outstanding per leaf vdev. +The number of concurrent TRIM commands issued to the device is controlled by +.Sy zfs_vdev_trim_min_active No and Sy zfs_vdev_trim_max_active . +. +.It Sy zfs_trim_txg_batch Ns = Ns Sy 32 Pq uint +The number of transaction groups' worth of frees which should be aggregated +before TRIM operations are issued to the device. +This setting represents a trade-off between issuing larger, +more efficient TRIM operations and the delay +before the recently trimmed space is available for use by the device. +.Pp +Increasing this value will allow frees to be aggregated for a longer time. +This will result is larger TRIM operations and potentially increased memory usage. +Decreasing this value will have the opposite effect. +The default of +.Sy 32 +was determined to be a reasonable compromise. +. +.It Sy zfs_txg_history Ns = Ns Sy 0 Pq int +Historical statistics for this many latest TXGs will be available in +.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /TXGs . +. +.It Sy zfs_txg_timeout Ns = Ns Sy 5 Ns s Pq int +Flush dirty data to disk at least every this many seconds (maximum TXG duration). +. +.It Sy zfs_vdev_aggregate_trim Ns = Ns Sy 0 Ns | Ns 1 Pq int +Allow TRIM I/Os to be aggregated. +This is normally not helpful because the extents to be trimmed +will have been already been aggregated by the metaslab. +This option is provided for debugging and performance analysis. +. +.It Sy zfs_vdev_aggregation_limit Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int +Max vdev I/O aggregation size. +. +.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int +Max vdev I/O aggregation size for non-rotating media. +. +.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64kB Pc Pq int +Shift size to inflate reads to. +. +.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16kB Pc Pq int +Inflate reads smaller than this value to meet the +.Sy zfs_vdev_cache_bshift +size +.Pq default Sy 64kB . +. +.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq int +Total size of the per-disk cache in bytes. +.Pp +Currently this feature is disabled, as it has been found to not be helpful +for performance and in some cases harmful. +. +.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int +A number by which the balancing algorithm increments the load calculation for +the purpose of selecting the least busy mirror member when an I/O operation +immediately follows its predecessor on rotational vdevs +for the purpose of making decisions based on load. +. +.It Sy zfs_vdev_mirror_rotating_seek_inc Ns = Ns Sy 5 Pq int +A number by which the balancing algorithm increments the load calculation for +the purpose of selecting the least busy mirror member when an I/O operation +lacks locality as defined by +.Sy zfs_vdev_mirror_rotating_seek_offset . +Operations within this that are not immediately following the previous operation +are incremented by half. +. +.It Sy zfs_vdev_mirror_rotating_seek_offset Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int +The maximum distance for the last queued I/O operation in which +the balancing algorithm considers an operation to have locality. +.No See Sx ZFS I/O SCHEDULER . +. +.It Sy zfs_vdev_mirror_non_rotating_inc Ns = Ns Sy 0 Pq int +A number by which the balancing algorithm increments the load calculation for +the purpose of selecting the least busy mirror member on non-rotational vdevs +when I/O operations do not immediately follow one another. +. +.It Sy zfs_vdev_mirror_non_rotating_seek_inc Ns = Ns Sy 1 Pq int +A number by which the balancing algorithm increments the load calculation for +the purpose of selecting the least busy mirror member when an I/O operation lacks +locality as defined by the +.Sy zfs_vdev_mirror_rotating_seek_offset . +Operations within this that are not immediately following the previous operation +are incremented by half. +. +.It Sy zfs_vdev_read_gap_limit Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int +Aggregate read I/O operations if the on-disk gap between them is within this +threshold. +. +.It Sy zfs_vdev_write_gap_limit Ns = Ns Sy 4096 Ns B Po 4kB Pc Pq int +Aggregate write I/O operations if the on-disk gap between them is within this +threshold. +. +.It Sy zfs_vdev_raidz_impl Ns = Ns Sy fastest Pq string +Select the raidz parity implementation to use. +.Pp +Variants that don't depend on CPU-specific features +may be selected on module load, as they are supported on all systems. +The remaining options may only be set after the module is loaded, +as they are available only if the implementations are compiled in +and supported on the running system. +.Pp +Once the module is loaded, +.Pa /sys/module/zfs/parameters/zfs_vdev_raidz_impl +will show the available options, +with the currently selected one enclosed in square brackets. +.Pp +.TS +lb l l . +fastest selected by built-in benchmark +original original implementation +scalar scalar implementation +sse2 SSE2 instruction set 64-bit x86 +ssse3 SSSE3 instruction set 64-bit x86 +avx2 AVX2 instruction set 64-bit x86 +avx512f AVX512F instruction set 64-bit x86 +avx512bw AVX512F & AVX512BW instruction sets 64-bit x86 +aarch64_neon NEON Aarch64/64-bit ARMv8 +aarch64_neonx2 NEON with more unrolling Aarch64/64-bit ARMv8 +powerpc_altivec Altivec PowerPC +.TE +. +.It Sy zfs_vdev_scheduler Pq charp +.Sy DEPRECATED . +Prints warning to kernel log for compatiblity. +. +.It Sy zfs_zevent_len_max Ns = Ns Sy 512 Pq int +Max event queue length. +Events in the queue can be viewed with +.Xr zpool-events 8 . +. +.It Sy zfs_zevent_retain_max Ns = Ns Sy 2000 Pq int +Maximum recent zevent records to retain for duplicate checking. +Setting this to +.Sy 0 +disables duplicate detection. +. +.It Sy zfs_zevent_retain_expire_secs Ns = Ns Sy 900 Ns s Po 15min Pc Pq int +Lifespan for a recent ereport that was retained for duplicate checking. +. +.It Sy zfs_zil_clean_taskq_maxalloc Ns = Ns Sy 1048576 Pq int +The maximum number of taskq entries that are allowed to be cached. +When this limit is exceeded transaction records (itxs) +will be cleaned synchronously. +. +.It Sy zfs_zil_clean_taskq_minalloc Ns = Ns Sy 1024 Pq int +The number of taskq entries that are pre-populated when the taskq is first +created and are immediately available for use. +. +.It Sy zfs_zil_clean_taskq_nthr_pct Ns = Ns Sy 100 Ns % Pq int +This controls the number of threads used by +.Sy dp_zil_clean_taskq . +The default value of +.Sy 100% +will create a maximum of one thread per cpu. +. +.It Sy zil_maxblocksize Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int +This sets the maximum block size used by the ZIL. +On very fragmented pools, lowering this +.Pq typically to Sy 36kB +can improve performance. +. +.It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable the cache flush commands that are normally sent to disk by +the ZIL after an LWB write has completed. +Setting this will cause ZIL corruption on power loss +if a volatile out-of-order write cache is enabled. +. +.It Sy zil_replay_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int +Disable intent logging replay. +Can be disabled for recovery from corrupted ZIL. +. +.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768kB Pc Pq ulong +Limit SLOG write size per commit executed with synchronous priority. +Any writes above that will be executed with lower (asynchronous) priority +to limit potential SLOG device abuse by single active ZIL writer. +. +.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64 Pq int +Usually, one metaslab from each normal-class vdev is dedicated for use by +the ZIL to log synchronous writes. +However, if there are fewer than +.Sy zfs_embedded_slog_min_ms +metaslabs in the vdev, this functionality is disabled. +This ensures that we don't set aside an unreasonable amount of space for the ZIL. +. +.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int +If non-zero, the zio deadman will produce debugging messages +.Pq see Sy zfs_dbgmsg_enable +for all zios, rather than only for leaf zios possessing a vdev. +This is meant to be used by developers to gain +diagnostic information for hang conditions which don't involve a mutex +or other locking primitive: typically conditions in which a thread in +the zio pipeline is looping indefinitely. +. +.It Sy zio_slow_io_ms Ns = Ns Sy 30000 Ns ms Po 30s Pc Pq int +When an I/O operation takes more than this much time to complete, +it's marked as slow. +Each slow operation causes a delay zevent. +Slow I/O counters can be seen with +.Nm zpool Cm status Fl s . +. +.It Sy zio_dva_throttle_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Throttle block allocations in the I/O pipeline. +This allows for dynamic allocation distribution when devices are imbalanced. +When enabled, the maximum number of pending allocations per top-level vdev +is limited by +.Sy zfs_vdev_queue_depth_pct . +. +.It Sy zio_requeue_io_start_cut_in_line Ns = Ns Sy 0 Ns | Ns 1 Pq int +Prioritize requeued I/O. +. +.It Sy zio_taskq_batch_pct Ns = Ns Sy 80 Ns % Pq uint +Percentage of online CPUs which will run a worker thread for I/O. +These workers are responsible for I/O work such as compression and +checksum calculations. +Fractional number of CPUs will be rounded down. +.Pp +The default value of +.Sy 80% +was chosen to avoid using all CPUs which can result in +latency issues and inconsistent application performance, +especially when slower compression and/or checksumming is enabled. +. +.It Sy zio_taskq_batch_tpq Ns = Ns Sy 0 Pq uint +Number of worker threads per taskq. +Lower values improve I/O ordering and CPU utilization, +while higher reduces lock contention. +.Pp +If +.Sy 0 , +generate a system-dependent value close to 6 threads per taskq. +. +.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint +Do not create zvol device nodes. +This may slightly improve startup time on +systems with a very large number of zvols. +. +.It Sy zvol_major Ns = Ns Sy 230 Pq uint +Major number for zvol block devices. +. +.It Sy zvol_max_discard_blocks Ns = Ns Sy 16384 Pq ulong +Discard (TRIM) operations done on zvols will be done in batches of this +many blocks, where block size is determined by the +.Sy volblocksize +property of a zvol. +. +.It Sy zvol_prefetch_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq uint +When adding a zvol to the system, prefetch this many bytes +from the start and end of the volume. +Prefetching these regions of the volume is desirable, +because they are likely to be accessed immediately by +.Xr blkid 8 +or the kernel partitioner. +. +.It Sy zvol_request_sync Ns = Ns Sy 0 Ns | Ns 1 Pq uint +When processing I/O requests for a zvol, submit them synchronously. +This effectively limits the queue depth to +.Em 1 +for each I/O submitter. +When unset, requests are handled asynchronously by a thread pool. +The number of requests which can be handled concurrently is controlled by +.Sy zvol_threads . +. +.It Sy zvol_threads Ns = Ns Sy 32 Pq uint +Max number of threads which can handle zvol I/O requests concurrently. +. +.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint +Defines zvol block devices behaviour when +.Sy volmode Ns = Ns Sy default : +.Bl -tag -compact -offset 4n -width "a" +.It Sy 1 +.No equivalent to Sy full +.It Sy 2 +.No equivalent to Sy dev +.It Sy 3 +.No equivalent to Sy none +.El +.El +. +.Sh ZFS I/O SCHEDULER +ZFS issues I/O operations to leaf vdevs to satisfy and complete I/O operations. +The scheduler determines when and in what order those operations are issued. +The scheduler divides operations into five I/O classes, +prioritized in the following order: sync read, sync write, async read, +async write, and scrub/resilver. +Each queue defines the minimum and maximum number of concurrent operations +that may be issued to the device. +In addition, the device has an aggregate maximum, +.Sy zfs_vdev_max_active . +Note that the sum of the per-queue minima must not exceed the aggregate maximum. +If the sum of the per-queue maxima exceeds the aggregate maximum, +then the number of active operations may reach +.Sy zfs_vdev_max_active , +in which case no further operations will be issued, +regardless of whether all per-queue minima have been met. +.Pp +For many physical devices, throughput increases with the number of +concurrent operations, but latency typically suffers. +Furthermore, physical devices typically have a limit +at which more concurrent operations have no +effect on throughput or can actually cause it to decrease. +.Pp +The scheduler selects the next operation to issue by first looking for an +I/O class whose minimum has not been satisfied. +Once all are satisfied and the aggregate maximum has not been hit, +the scheduler looks for classes whose maximum has not been satisfied. +Iteration through the I/O classes is done in the order specified above. +No further operations are issued +if the aggregate maximum number of concurrent operations has been hit, +or if there are no operations queued for an I/O class that has not hit its maximum. +Every time an I/O operation is queued or an operation completes, +the scheduler looks for new operations to issue. +.Pp +In general, smaller +.Sy max_active Ns s +will lead to lower latency of synchronous operations. +Larger +.Sy max_active Ns s +may lead to higher overall throughput, depending on underlying storage. +.Pp +The ratio of the queues' +.Sy max_active Ns s +determines the balance of performance between reads, writes, and scrubs. +For example, increasing +.Sy zfs_vdev_scrub_max_active +will cause the scrub or resilver to complete more quickly, +but reads and writes to have higher latency and lower throughput. +.Pp +All I/O classes have a fixed maximum number of outstanding operations, +except for the async write class. +Asynchronous writes represent the data that is committed to stable storage +during the syncing stage for transaction groups. +Transaction groups enter the syncing state periodically, +so the number of queued async writes will quickly burst up +and then bleed down to zero. +Rather than servicing them as quickly as possible, +the I/O scheduler changes the maximum number of active async write operations +according to the amount of dirty data in the pool. +Since both throughput and latency typically increase with the number of +concurrent operations issued to physical devices, reducing the +burstiness in the number of concurrent operations also stabilizes the +response time of operations from other – and in particular synchronous – queues. +In broad strokes, the I/O scheduler will issue more concurrent operations +from the async write queue as there's more dirty data in the pool. +. +.Ss Async Writes +The number of concurrent operations issued for the async write I/O class +follows a piece-wise linear function defined by a few adjustable points: +.Bd -literal + | o---------| <-- \fBzfs_vdev_async_write_max_active\fP + ^ | /^ | + | | / | | +active | / | | + I/O | / | | +count | / | | + | / | | + |-------o | | <-- \fBzfs_vdev_async_write_min_active\fP + 0|_______^______|_________| + 0% | | 100% of \fBzfs_dirty_data_max\fP + | | + | `-- \fBzfs_vdev_async_write_active_max_dirty_percent\fP + `--------- \fBzfs_vdev_async_write_active_min_dirty_percent\fP +.Ed +.Pp +Until the amount of dirty data exceeds a minimum percentage of the dirty +data allowed in the pool, the I/O scheduler will limit the number of +concurrent operations to the minimum. +As that threshold is crossed, the number of concurrent operations issued +increases linearly to the maximum at the specified maximum percentage +of the dirty data allowed in the pool. +.Pp +Ideally, the amount of dirty data on a busy pool will stay in the sloped +part of the function between +.Sy zfs_vdev_async_write_active_min_dirty_percent +and +.Sy zfs_vdev_async_write_active_max_dirty_percent . +If it exceeds the maximum percentage, +this indicates that the rate of incoming data is +greater than the rate that the backend storage can handle. +In this case, we must further throttle incoming writes, +as described in the next section. +. +.Sh ZFS TRANSACTION DELAY +We delay transactions when we've determined that the backend storage +isn't able to accommodate the rate of incoming writes. +.Pp +If there is already a transaction waiting, we delay relative to when +that transaction will finish waiting. +This way the calculated delay time +is independent of the number of threads concurrently executing transactions. +.Pp +If we are the only waiter, wait relative to when the transaction started, +rather than the current time. +This credits the transaction for "time already served", +e.g. reading indirect blocks. +.Pp +The minimum time for a transaction to take is calculated as +.Dl min_time = min( Ns Sy zfs_delay_scale No * (dirty - min) / (max - dirty), 100ms) +.Pp +The delay has two degrees of freedom that can be adjusted via tunables. +The percentage of dirty data at which we start to delay is defined by +.Sy zfs_delay_min_dirty_percent . +This should typically be at or above +.Sy zfs_vdev_async_write_active_max_dirty_percent , +so that we only start to delay after writing at full speed +has failed to keep up with the incoming write rate. +The scale of the curve is defined by +.Sy zfs_delay_scale . +Roughly speaking, this variable determines the amount of delay at the midpoint of the curve. +.Bd -literal +delay + 10ms +-------------------------------------------------------------*+ + | *| + 9ms + *+ + | *| + 8ms + *+ + | * | + 7ms + * + + | * | + 6ms + * + + | * | + 5ms + * + + | * | + 4ms + * + + | * | + 3ms + * + + | * | + 2ms + (midpoint) * + + | | ** | + 1ms + v *** + + | \fBzfs_delay_scale\fP ----------> ******** | + 0 +-------------------------------------*********----------------+ + 0% <- \fBzfs_dirty_data_max\fP -> 100% +.Ed +.Pp +Note, that since the delay is added to the outstanding time remaining on the +most recent transaction it's effectively the inverse of IOPS. +Here, the midpoint of +.Em 500us +translates to +.Em 2000 IOPS . +The shape of the curve +was chosen such that small changes in the amount of accumulated dirty data +in the first three quarters of the curve yield relatively small differences +in the amount of delay. +.Pp +The effects can be easier to understand when the amount of delay is +represented on a logarithmic scale: +.Bd -literal +delay +100ms +-------------------------------------------------------------++ + + + + | | + + *+ + 10ms + *+ + + ** + + | (midpoint) ** | + + | ** + + 1ms + v **** + + + \fBzfs_delay_scale\fP ----------> ***** + + | **** | + + **** + +100us + ** + + + * + + | * | + + * + + 10us + * + + + + + | | + + + + +--------------------------------------------------------------+ + 0% <- \fBzfs_dirty_data_max\fP -> 100% +.Ed +.Pp +Note here that only as the amount of dirty data approaches its limit does +the delay start to increase rapidly. +The goal of a properly tuned system should be to keep the amount of dirty data +out of that range by first ensuring that the appropriate limits are set +for the I/O scheduler to reach optimal throughput on the back-end storage, +and then by changing the value of +.Sy zfs_delay_scale +to increase the steepness of the curve. diff --git a/man/man5/spl-module-parameters.5 b/man/man5/spl-module-parameters.5 deleted file mode 100644 index 88652a75ae43..000000000000 --- a/man/man5/spl-module-parameters.5 +++ /dev/null @@ -1,196 +0,0 @@ -.\" -.\" The contents of this file are subject to the terms of the Common Development -.\" and Distribution License (the "License"). You may not use this file except -.\" in compliance with the License. You can obtain a copy of the license at -.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" -.\" See the License for the specific language governing permissions and -.\" limitations under the License. When distributing Covered Code, include this -.\" CDDL HEADER in each file and include the License file at -.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this -.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your -.\" own identifying information: -.\" Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" Copyright 2013 Turbo Fredriksson . All rights reserved. -.\" -.Dd August 24, 2020 -.Dt SPL-MODULE-PARAMETERS 5 -.Os -. -.Sh NAME -.Nm spl-module-parameters -.Nd parameters of the SPL kernel module -. -.Sh DESCRIPTION -.Bl -tag -width Ds -.It Sy spl_kmem_cache_kmem_threads Ns = Ns Sy 4 Pq uint -The number of threads created for the spl_kmem_cache task queue. -This task queue is responsible for allocating new slabs -for use by the kmem caches. -For the majority of systems and workloads only a small number of threads are -required. -. -.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint -When this is set it prevents Linux from being able to rapidly reclaim all the -memory held by the kmem caches. -This may be useful in circumstances where it's preferable that Linux -reclaim memory from some other subsystem first. -Setting this will increase the likelihood out of memory events on a memory -constrained system. -. -.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint -The preferred number of objects per slab in the cache. -In general, a larger value will increase the caches memory footprint -while decreasing the time required to perform an allocation. -Conversely, a smaller value will minimize the footprint -and improve cache reclaim time but individual allocations may take longer. -. -.It Sy spl_kmem_cache_max_size Ns = Ns Sy 32 Po 64-bit Pc or Sy 4 Po 32-bit Pc Pq uint -The maximum size of a kmem cache slab in MiB. -This effectively limits the maximum cache object size to -.Sy spl_kmem_cache_max_size Ns / Ns Sy spl_kmem_cache_obj_per_slab . -.Pp -Caches may not be created with -object sized larger than this limit. -. -.It Sy spl_kmem_cache_slab_limit Ns = Ns Sy 16384 Pq uint -For small objects the Linux slab allocator should be used to make the most -efficient use of the memory. -However, large objects are not supported by -the Linux slab and therefore the SPL implementation is preferred. -This value is used to determine the cutoff between a small and large object. -.Pp -Objects of size -.Sy spl_kmem_cache_slab_limit -or smaller will be allocated using the Linux slab allocator, -large objects use the SPL allocator. -A cutoff of 16K was determined to be optimal for architectures using 4K pages. -. -.It Sy spl_kmem_alloc_warn Ns = Ns Sy 32768 Pq uint -As a general rule -.Fn kmem_alloc -allocations should be small, -preferably just a few pages, since they must by physically contiguous. -Therefore, a rate limited warning will be printed to the console for any -.Fn kmem_alloc -which exceeds a reasonable threshold. -.Pp -The default warning threshold is set to eight pages but capped at 32K to -accommodate systems using large pages. -This value was selected to be small enough to ensure -the largest allocations are quickly noticed and fixed. -But large enough to avoid logging any warnings when a allocation size is -larger than optimal but not a serious concern. -Since this value is tunable, developers are encouraged to set it lower -when testing so any new largish allocations are quickly caught. -These warnings may be disabled by setting the threshold to zero. -. -.It Sy spl_kmem_alloc_max Ns = Ns Sy KMALLOC_MAX_SIZE Ns / Ns Sy 4 Pq uint -Large -.Fn kmem_alloc -allocations will fail if they exceed -.Sy KMALLOC_MAX_SIZE . -Allocations which are marginally smaller than this limit may succeed but -should still be avoided due to the expense of locating a contiguous range -of free pages. -Therefore, a maximum kmem size with reasonable safely margin of 4x is set. -.Fn kmem_alloc -allocations larger than this maximum will quickly fail. -.Fn vmem_alloc -allocations less than or equal to this value will use -.Fn kmalloc , -but shift to -.Fn vmalloc -when exceeding this value. -. -.It Sy spl_kmem_cache_magazine_size Ns = Ns Sy 0 Pq uint -Cache magazines are an optimization designed to minimize the cost of -allocating memory. -They do this by keeping a per-cpu cache of recently -freed objects, which can then be reallocated without taking a lock. -This can improve performance on highly contended caches. -However, because objects in magazines will prevent otherwise empty slabs -from being immediately released this may not be ideal for low memory machines. -.Pp -For this reason, -.Sy spl_kmem_cache_magazine_size -can be used to set a maximum magazine size. -When this value is set to 0 the magazine size will -be automatically determined based on the object size. -Otherwise magazines will be limited to 2-256 objects per magazine (i.e per cpu). -Magazines may never be entirely disabled in this implementation. -. -.It Sy spl_hostid Ns = Ns Sy 0 Pq ulong -The system hostid, when set this can be used to uniquely identify a system. -By default this value is set to zero which indicates the hostid is disabled. -It can be explicitly enabled by placing a unique non-zero value in -.Pa /etc/hostid . -. -.It Sy spl_hostid_path Ns = Ns Pa /etc/hostid Pq charp -The expected path to locate the system hostid when specified. -This value may be overridden for non-standard configurations. -. -.It Sy spl_panic_halt Ns = Ns Sy 0 Pq uint -Cause a kernel panic on assertion failures. -When not enabled, the thread is halted to facilitate further debugging. -.Pp -Set to a non-zero value to enable. -. -.It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint -Kick stuck taskq to spawn threads. -When writing a non-zero value to it, it will scan all the taskqs. -If any of them have a pending task more than 5 seconds old, -it will kick it to spawn more threads. -This can be used if you find a rare -deadlock occurs because one or more taskqs didn't spawn a thread when it should. -. -.It Sy spl_taskq_thread_bind Ns = Ns Sy 0 Pq int -Bind taskq threads to specific CPUs. -When enabled all taskq threads will be distributed evenly -across the available CPUs. -By default, this behavior is disabled to allow the Linux scheduler -the maximum flexibility to determine where a thread should run. -. -.It Sy spl_taskq_thread_dynamic Ns = Ns Sy 1 Pq int -Allow dynamic taskqs. -When enabled taskqs which set the -.Sy TASKQ_DYNAMIC -flag will by default create only a single thread. -New threads will be created on demand up to a maximum allowed number -to facilitate the completion of outstanding tasks. -Threads which are no longer needed will be promptly destroyed. -By default this behavior is enabled but it can be disabled to -aid performance analysis or troubleshooting. -. -.It Sy spl_taskq_thread_priority Ns = Ns Sy 1 Pq int -Allow newly created taskq threads to set a non-default scheduler priority. -When enabled, the priority specified when a taskq is created will be applied -to all threads created by that taskq. -When disabled all threads will use the default Linux kernel thread priority. -By default, this behavior is enabled. -. -.It Sy spl_taskq_thread_sequential Ns = Ns Sy 4 Pq int -The number of items a taskq worker thread must handle without interruption -before requesting a new worker thread be spawned. -This is used to control -how quickly taskqs ramp up the number of threads processing the queue. -Because Linux thread creation and destruction are relatively inexpensive a -small default value has been selected. -This means that normally threads will be created aggressively which is desirable. -Increasing this value will -result in a slower thread creation rate which may be preferable for some -configurations. -. -.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint -The maximum number of tasks per pending list in each taskq shown in -.Pa /proc/spl/taskq{,-all} . -Write -.Sy 0 -to turn off the limit. -The proc file will walk the lists with lock held, -reading it could cause a lock-up if the list grow too large -without limiting the output. -"(truncated)" will be shown if the list is larger than the limit. -. -.El diff --git a/man/man5/zfs-events.5 b/man/man5/zfs-events.5 deleted file mode 100644 index 846a7080d01c..000000000000 --- a/man/man5/zfs-events.5 +++ /dev/null @@ -1,448 +0,0 @@ -.\" -.\" Copyright (c) 2013 by Turbo Fredriksson . All rights reserved. -.\" Portions Copyright 2018 by Richard Elling -.\" The contents of this file are subject to the terms of the Common Development -.\" and Distribution License (the "License"). You may not use this file except -.\" in compliance with the License. You can obtain a copy of the license at -.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" -.\" See the License for the specific language governing permissions and -.\" limitations under the License. When distributing Covered Code, include this -.\" CDDL HEADER in each file and include the License file at -.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this -.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your -.\" own identifying information: -.\" Portions Copyright [yyyy] [name of copyright owner] -.\" -.Dd May 26, 2021 -.Dt ZFS-EVENTS 5 -.Os -. -.Sh NAME -.Nm zfs-events -.Nd Events created by the ZFS filesystem -.Sh DESCRIPTION -Description of the different events generated by the ZFS stack. -.Pp -Most of these don't have any description. -The events generated by ZFS have never been publicly documented. -What is here is intended as a starting point to provide documentation -for all possible events. -.Pp -To view all events created since the loading of the ZFS infrastructure -(i.e, "the module"), run -.Dl Nm zpool Cm events -to get a short list, and -.Dl Nm zpool Cm events Fl v -to get a full detail of the events and what information -is available about it. -.Pp -This manual page lists the different subclasses that are issued -in the case of an event. -The full event name would be -.Sy ereport.fs.zfs.\& Ns Em SUBCLASS , -but we only list the last part here. -. -.Sh EVENTS (SUBCLASS) -.Bl -tag -compact -width "vdev.bad_guid_sum" -.It Sy checksum -Issued when a checksum error has been detected. -.It Sy io -Issued when there is an I/O error in a vdev in the pool. -.It Sy data -Issued when there have been data errors in the pool. -.It Sy deadman -Issued when an I/O request is determined to be "hung", this can be caused -by lost completion events due to flaky hardware or drivers. -See -.Sy zfs_deadman_failmode -in -.Xr zfs-module-parameters 5 -for additional information regarding "hung" I/O detection and configuration. -.It Sy delay -Issued when a completed I/O request exceeds the maximum allowed time -specified by the -.Sy zio_slow_io_ms -module parameter. -This can be an indicator of problems with the underlying storage device. -The number of delay events is ratelimited by the -.Sy zfs_slow_io_events_per_second -module parameter. -.It Sy config -Issued every time a vdev change have been done to the pool. -.It Sy zpool -Issued when a pool cannot be imported. -.It Sy zpool.destroy -Issued when a pool is destroyed. -.It Sy zpool.export -Issued when a pool is exported. -.It Sy zpool.import -Issued when a pool is imported. -.It Sy zpool.reguid -Issued when a REGUID (new unique identifier for the pool have been regenerated) have been detected. -.It Sy vdev.unknown -Issued when the vdev is unknown. -Such as trying to clear device errors on a vdev that have failed/been kicked -from the system/pool and is no longer available. -.It Sy vdev.open_failed -Issued when a vdev could not be opened (because it didn't exist for example). -.It Sy vdev.corrupt_data -Issued when corrupt data have been detected on a vdev. -.It Sy vdev.no_replicas -Issued when there are no more replicas to sustain the pool. -This would lead to the pool being -.Em DEGRADED . -.It Sy vdev.bad_guid_sum -Issued when a missing device in the pool have been detected. -.It Sy vdev.too_small -Issued when the system (kernel) have removed a device, and ZFS -notices that the device isn't there any more. -This is usually followed by a -.Sy probe_failure -event. -.It Sy vdev.bad_label -Issued when the label is OK but invalid. -.It Sy vdev.bad_ashift -Issued when the ashift alignment requirement has increased. -.It Sy vdev.remove -Issued when a vdev is detached from a mirror (or a spare detached from a -vdev where it have been used to replace a failed drive - only works if -the original drive have been readded). -.It Sy vdev.clear -Issued when clearing device errors in a pool. -Such as running -.Nm zpool Cm clear -on a device in the pool. -.It Sy vdev.check -Issued when a check to see if a given vdev could be opened is started. -.It Sy vdev.spare -Issued when a spare have kicked in to replace a failed device. -.It Sy vdev.autoexpand -Issued when a vdev can be automatically expanded. -.It Sy io_failure -Issued when there is an I/O failure in a vdev in the pool. -.It Sy probe_failure -Issued when a probe fails on a vdev. -This would occur if a vdev -have been kicked from the system outside of ZFS (such as the kernel -have removed the device). -.It Sy log_replay -Issued when the intent log cannot be replayed. -The can occur in the case of a missing or damaged log device. -.It Sy resilver.start -Issued when a resilver is started. -.It Sy resilver.finish -Issued when the running resilver have finished. -.It Sy scrub.start -Issued when a scrub is started on a pool. -.It Sy scrub.finish -Issued when a pool has finished scrubbing. -.It Sy scrub.abort -Issued when a scrub is aborted on a pool. -.It Sy scrub.resume -Issued when a scrub is resumed on a pool. -.It Sy scrub.paused -Issued when a scrub is paused on a pool. -.It Sy bootfs.vdev.attach -.El -. -.Sh PAYLOADS -This is the payload (data, information) that accompanies an -event. -.Pp -For -.Xr zed 8 , -these are set to uppercase and prefixed with -.Sy ZEVENT_ . -.Bl -tag -compact -width "vdev_cksum_errors" -.It Sy pool -Pool name. -.It Sy pool_failmode -Failmode - -.Sy wait , -.Sy continue , -or -.Sy panic . -See the -.Sy failmode -property in -.Xr zpoolprops 8 -for more information. -.It Sy pool_guid -The GUID of the pool. -.It Sy pool_context -The load state for the pool (0=none, 1=open, 2=import, 3=tryimport, 4=recover -5=error). -.It Sy vdev_guid -The GUID of the vdev in question (the vdev failing or operated upon with -.Nm zpool Cm clear , -etc.). -.It Sy vdev_type -Type of vdev - -.Sy disk , -.Sy file , -.Sy mirror , -etc. -See the -.Sy Virtual Devices -section of -.Xr zpoolconcepts 8 -for more information on possible values. -.It Sy vdev_path -Full path of the vdev, including any -.Em -partX . -.It Sy vdev_devid -ID of vdev (if any). -.It Sy vdev_fru -Physical FRU location. -.It Sy vdev_state -State of vdev (0=uninitialized, 1=closed, 2=offline, 3=removed, 4=failed to open, 5=faulted, 6=degraded, 7=healthy). -.It Sy vdev_ashift -The ashift value of the vdev. -.It Sy vdev_complete_ts -The time the last I/O request completed for the specified vdev. -.It Sy vdev_delta_ts -The time since the last I/O request completed for the specified vdev. -.It Sy vdev_spare_paths -List of spares, including full path and any -.Em -partX . -.It Sy vdev_spare_guids -GUID(s) of spares. -.It Sy vdev_read_errors -How many read errors that have been detected on the vdev. -.It Sy vdev_write_errors -How many write errors that have been detected on the vdev. -.It Sy vdev_cksum_errors -How many checksum errors that have been detected on the vdev. -.It Sy parent_guid -GUID of the vdev parent. -.It Sy parent_type -Type of parent. -See -.Sy vdev_type . -.It Sy parent_path -Path of the vdev parent (if any). -.It Sy parent_devid -ID of the vdev parent (if any). -.It Sy zio_objset -The object set number for a given I/O request. -.It Sy zio_object -The object number for a given I/O request. -.It Sy zio_level -The indirect level for the block. -Level 0 is the lowest level and includes data blocks. -Values > 0 indicate metadata blocks at the appropriate level. -.It Sy zio_blkid -The block ID for a given I/O request. -.It Sy zio_err -The error number for a failure when handling a given I/O request, -compatible with -.Xr errno 3 -with the value of -.Sy EBADE -used to indicate a ZFS checksum error. -.It Sy zio_offset -The offset in bytes of where to write the I/O request for the specified vdev. -.It Sy zio_size -The size in bytes of the I/O request. -.It Sy zio_flags -The current flags describing how the I/O request should be handled. -See the -.Sy I/O FLAGS -section for the full list of I/O flags. -.It Sy zio_stage -The current stage of the I/O in the pipeline. -See the -.Sy I/O STAGES -section for a full list of all the I/O stages. -.It Sy zio_pipeline -The valid pipeline stages for the I/O. -See the -.Sy I/O STAGES -section for a full list of all the I/O stages. -.It Sy zio_delay -The time elapsed (in nanoseconds) waiting for the block layer to complete the -I/O request. -Unlike -.Sy zio_delta , -this does not include any vdev queuing time and is -therefore solely a measure of the block layer performance. -.It Sy zio_timestamp -The time when a given I/O request was submitted. -.It Sy zio_delta -The time required to service a given I/O request. -.It Sy prev_state -The previous state of the vdev. -.It Sy cksum_expected -The expected checksum value for the block. -.It Sy cksum_actual -The actual checksum value for an errant block. -.It Sy cksum_algorithm -Checksum algorithm used. -See -.Xr zfsprops 8 -for more information on the available checksum algorithms. -.It Sy cksum_byteswap -Whether or not the data is byteswapped. -.It Sy bad_ranges -.No [\& Ns Ar start , end ) -pairs of corruption offsets. -Offsets are always aligned on a 64-bit boundary, -and can include some gaps of non-corruption. -(See -.Sy bad_ranges_min_gap ) -.It Sy bad_ranges_min_gap -In order to bound the size of the -.Sy bad_ranges -array, gaps of non-corruption -less than or equal to -.Sy bad_ranges_min_gap -bytes have been merged with -adjacent corruption. -Always at least 8 bytes, since corruption is detected on a 64-bit word basis. -.It Sy bad_range_sets -This array has one element per range in -.Sy bad_ranges . -Each element contains -the count of bits in that range which were clear in the good data and set -in the bad data. -.It Sy bad_range_clears -This array has one element per range in -.Sy bad_ranges . -Each element contains -the count of bits for that range which were set in the good data and clear in -the bad data. -.It Sy bad_set_bits -If this field exists, it is an array of -.Pq Ar bad data No & ~( Ns Ar good data ) ; -that is, the bits set in the bad data which are cleared in the good data. -Each element corresponds a byte whose offset is in a range in -.Sy bad_ranges , -and the array is ordered by offset. -Thus, the first element is the first byte in the first -.Sy bad_ranges -range, and the last element is the last byte in the last -.Sy bad_ranges -range. -.It Sy bad_cleared_bits -Like -.Sy bad_set_bits , -but contains -.Pq Ar good data No & ~( Ns Ar bad data ) ; -that is, the bits set in the good data which are cleared in the bad data. -.It Sy bad_set_histogram -If this field exists, it is an array of counters. -Each entry counts bits set in a particular bit of a big-endian uint64 type. -The first entry counts bits -set in the high-order bit of the first byte, the 9th byte, etc, and the last -entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc. -This information is useful for observing a stuck bit in a parallel data path, -such as IDE or parallel SCSI. -.It Sy bad_cleared_histogram -If this field exists, it is an array of counters. -Each entry counts bit clears in a particular bit of a big-endian uint64 type. -The first entry counts bits -clears of the high-order bit of the first byte, the 9th byte, etc, and the -last entry counts clears of the low-order bit of the 8th byte, the 16th byte, etc. -This information is useful for observing a stuck bit in a parallel data -path, such as IDE or parallel SCSI. -.El -. -.Sh I/O STAGES -The ZFS I/O pipeline is comprised of various stages which are defined below. -The individual stages are used to construct these basic I/O -operations: Read, Write, Free, Claim, and Ioctl. -These stages may be -set on an event to describe the life cycle of a given I/O request. -.Pp -.TS -tab(:); -l l l . -Stage:Bit Mask:Operations -_:_:_ -ZIO_STAGE_OPEN:0x00000001:RWFCI - -ZIO_STAGE_READ_BP_INIT:0x00000002:R---- -ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W--- -ZIO_STAGE_FREE_BP_INIT:0x00000008:--F-- -ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF-- -ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W--- - -ZIO_STAGE_ENCRYPT:0x00000040:-W--- -ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W--- - -ZIO_STAGE_NOP_WRITE:0x00000100:-W--- - -ZIO_STAGE_DDT_READ_START:0x00000200:R---- -ZIO_STAGE_DDT_READ_DONE:0x00000400:R---- -ZIO_STAGE_DDT_WRITE:0x00000800:-W--- -ZIO_STAGE_DDT_FREE:0x00001000:--F-- - -ZIO_STAGE_GANG_ASSEMBLE:0x00002000:RWFC- -ZIO_STAGE_GANG_ISSUE:0x00004000:RWFC- - -ZIO_STAGE_DVA_THROTTLE:0x00008000:-W--- -ZIO_STAGE_DVA_ALLOCATE:0x00010000:-W--- -ZIO_STAGE_DVA_FREE:0x00020000:--F-- -ZIO_STAGE_DVA_CLAIM:0x00040000:---C- - -ZIO_STAGE_READY:0x00080000:RWFCI - -ZIO_STAGE_VDEV_IO_START:0x00100000:RW--I -ZIO_STAGE_VDEV_IO_DONE:0x00200000:RW--I -ZIO_STAGE_VDEV_IO_ASSESS:0x00400000:RW--I - -ZIO_STAGE_CHECKSUM_VERIFY:0x00800000:R---- - -ZIO_STAGE_DONE:0x01000000:RWFCI -.TE -. -.Sh I/O FLAGS -Every I/O request in the pipeline contains a set of flags which describe its -function and are used to govern its behavior. -These flags will be set in an event as a -.Sy zio_flags -payload entry. -.Pp -.TS -tab(:); -l l . -Flag:Bit Mask -_:_ -ZIO_FLAG_DONT_AGGREGATE:0x00000001 -ZIO_FLAG_IO_REPAIR:0x00000002 -ZIO_FLAG_SELF_HEAL:0x00000004 -ZIO_FLAG_RESILVER:0x00000008 -ZIO_FLAG_SCRUB:0x00000010 -ZIO_FLAG_SCAN_THREAD:0x00000020 -ZIO_FLAG_PHYSICAL:0x00000040 - -ZIO_FLAG_CANFAIL:0x00000080 -ZIO_FLAG_SPECULATIVE:0x00000100 -ZIO_FLAG_CONFIG_WRITER:0x00000200 -ZIO_FLAG_DONT_RETRY:0x00000400 -ZIO_FLAG_DONT_CACHE:0x00000800 -ZIO_FLAG_NODATA:0x00001000 -ZIO_FLAG_INDUCE_DAMAGE:0x00002000 - -ZIO_FLAG_IO_ALLOCATING:0x00004000 -ZIO_FLAG_IO_RETRY:0x00008000 -ZIO_FLAG_PROBE:0x00010000 -ZIO_FLAG_TRYHARD:0x00020000 -ZIO_FLAG_OPTIONAL:0x00040000 - -ZIO_FLAG_DONT_QUEUE:0x00080000 -ZIO_FLAG_DONT_PROPAGATE:0x00100000 -ZIO_FLAG_IO_BYPASS:0x00200000 -ZIO_FLAG_IO_REWRITE:0x00400000 -ZIO_FLAG_RAW_COMPRESS:0x00800000 -ZIO_FLAG_RAW_ENCRYPT:0x01000000 - -ZIO_FLAG_GANG_CHILD:0x02000000 -ZIO_FLAG_DDT_CHILD:0x04000000 -ZIO_FLAG_GODFATHER:0x08000000 -ZIO_FLAG_NOPWRITE:0x10000000 -ZIO_FLAG_REEXECUTED:0x20000000 -ZIO_FLAG_DELEGATED:0x40000000 -ZIO_FLAG_FASTWRITE:0x80000000 -.TE diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 deleted file mode 100644 index 6dbf2749f3b7..000000000000 --- a/man/man5/zfs-module-parameters.5 +++ /dev/null @@ -1,2379 +0,0 @@ -.\" -.\" Copyright (c) 2013 by Turbo Fredriksson . All rights reserved. -.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved. -.\" Copyright (c) 2019 Datto Inc. -.\" The contents of this file are subject to the terms of the Common Development -.\" and Distribution License (the "License"). You may not use this file except -.\" in compliance with the License. You can obtain a copy of the license at -.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" -.\" See the License for the specific language governing permissions and -.\" limitations under the License. When distributing Covered Code, include this -.\" CDDL HEADER in each file and include the License file at -.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this -.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your -.\" own identifying information: -.\" Portions Copyright [yyyy] [name of copyright owner] -.\" -.Dd June 1, 2021 -.Dt ZFS-MODULE-PARAMETERS 5 -.Os -. -.Sh NAME -.Nm zfs-module-parameters -.Nd parameters of the ZFS kernel module -. -.Sh DESCRIPTION -.Bl -tag -width Ds -.It Sy dbuf_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong -Maximum size in bytes of the dbuf cache. -The target size is determined by the MIN versus -.No 1/2^ Ns Sy dbuf_cache_shift Pq 1/32nd -of the target ARC size. -The behavior of the dbuf cache and its associated settings -can be observed via the -.Pa /proc/spl/kstat/zfs/dbufstats -kstat. -. -.It Sy dbuf_metadata_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong -Maximum size in bytes of the metadata dbuf cache. -The target size is determined by the MIN versus -.No 1/2^ Ns Sy dbuf_metadata_cache_shift Pq 1/64th -of the target ARC size. -The behavior of the metadata dbuf cache and its associated settings -can be observed via the -.Pa /proc/spl/kstat/zfs/dbufstats -kstat. -. -.It Sy dbuf_cache_hiwater_pct Ns = Ns Sy 10 Ns % Pq uint -The percentage over -.Sy dbuf_cache_max_bytes -when dbufs must be evicted directly. -. -.It Sy dbuf_cache_lowater_pct Ns = Ns Sy 10 Ns % Pq uint -The percentage below -.Sy dbuf_cache_max_bytes -when the evict thread stops evicting dbufs. -. -.It Sy dbuf_cache_shift Ns = Ns Sy 5 Pq int -Set the size of the dbuf cache -.Pq Sy dbuf_cache_max_bytes -to a log2 fraction of the target ARC size. -. -.It Sy dbuf_metadata_cache_shift Ns = Ns Sy 6 Pq int -Set the size of the dbuf metadata cache -.Pq Sy dbuf_metadata_cache_max_bytes -to a log2 fraction of the target ARC size. -. -.It Sy dmu_object_alloc_chunk_shift Ns = Ns Sy 7 Po 128 Pc Pq int -dnode slots allocated in a single operation as a power of 2. -The default value minimizes lock contention for the bulk operation performed. -. -.It Sy dmu_prefetch_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq int -Limit the amount we can prefetch with one call to this amount in bytes. -This helps to limit the amount of memory that can be used by prefetching. -. -.It Sy ignore_hole_birth Pq int -Alias for -.Sy send_holes_without_birth_time . -. -.It Sy l2arc_feed_again Ns = Ns Sy 1 Ns | Ns 0 Pq int -Turbo L2ARC warm-up. -When the L2ARC is cold the fill interval will be set as fast as possible. -. -.It Sy l2arc_feed_min_ms Ns = Ns Sy 200 Pq ulong -Min feed interval in milliseconds. -Requires -.Sy l2arc_feed_again Ns = Ns Ar 1 -and only applicable in related situations. -. -.It Sy l2arc_feed_secs Ns = Ns Sy 1 Pq ulong -Seconds between L2ARC writing. -. -.It Sy l2arc_headroom Ns = Ns Sy 2 Pq ulong -How far through the ARC lists to search for L2ARC cacheable content, -expressed as a multiplier of -.Sy l2arc_write_max . -ARC persistence across reboots can be achieved with persistent L2ARC -by setting this parameter to -.Sy 0 , -allowing the full length of ARC lists to be searched for cacheable content. -. -.It Sy l2arc_headroom_boost Ns = Ns Sy 200 Ns % Pq ulong -Scales -.Sy l2arc_headroom -by this percentage when L2ARC contents are being successfully compressed -before writing. -A value of -.Sy 100 -disables this feature. -. -.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int -Controls whether only MFU metadata and data are cached from ARC into L2ARC. -This may be desired to avoid wasting space on L2ARC when reading/writing large -amounts of data that are not expected to be accessed more than once. -.Pp -The default is off, -meaning both MRU and MFU data and metadata are cached. -When turning off this feature, some MRU buffers will still be present -in ARC and eventually cached on L2ARC. -.No If Sy l2arc_noprefetch Ns = Ns Sy 0 , -some prefetched buffers will be cached to L2ARC, and those might later -transition to MRU, in which case the -.Sy l2arc_mru_asize No arcstat will not be Sy 0 . -.Pp -Regardless of -.Sy l2arc_noprefetch , -some MFU buffers might be evicted from ARC, -accessed later on as prefetches and transition to MRU as prefetches. -If accessed again they are counted as MRU and the -.Sy l2arc_mru_asize No arcstat will not be Sy 0 . -.Pp -The ARC status of L2ARC buffers when they were first cached in -L2ARC can be seen in the -.Sy l2arc_mru_asize , Sy l2arc_mfu_asize , No and Sy l2arc_prefetch_asize -arcstats when importing the pool or onlining a cache -device if persistent L2ARC is enabled. -.Pp -The -.Sy evict_l2_eligible_mru -arcstat does not take into account if this option is enabled as the information -provided by the -.Sy evict_l2_eligible_m[rf]u -arcstats can be used to decide if toggling this option is appropriate -for the current workload. -. -.It Sy l2arc_meta_percent Ns = Ns Sy 33 Ns % Pq int -Percent of ARC size allowed for L2ARC-only headers. -Since L2ARC buffers are not evicted on memory pressure, -too many headers on a system with an irrationally large L2ARC -can render it slow or unusable. -This parameter limits L2ARC writes and rebuilds to achieve the target. -. -.It Sy l2arc_trim_ahead Ns = Ns Sy 0 Ns % Pq ulong -Trims ahead of the current write size -.Pq Sy l2arc_write_max -on L2ARC devices by this percentage of write size if we have filled the device. -If set to -.Sy 100 -we TRIM twice the space required to accommodate upcoming writes. -A minimum of -.Sy 64MB -will be trimmed. -It also enables TRIM of the whole L2ARC device upon creation -or addition to an existing pool or if the header of the device is -invalid upon importing a pool or onlining a cache device. -A value of -.Sy 0 -disables TRIM on L2ARC altogether and is the default as it can put significant -stress on the underlying storage devices. -This will vary depending of how well the specific device handles these commands. -. -.It Sy l2arc_noprefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int -Do not write buffers to L2ARC if they were prefetched but not used by -applications. -In case there are prefetched buffers in L2ARC and this option -is later set, we do not read the prefetched buffers from L2ARC. -Unsetting this option is useful for caching sequential reads from the -disks to L2ARC and serve those reads from L2ARC later on. -This may be beneficial in case the L2ARC device is significantly faster -in sequential reads than the disks of the pool. -.Pp -Use -.Sy 1 -to disable and -.Sy 0 -to enable caching/reading prefetches to/from L2ARC. -. -.It Sy l2arc_norw Ns = Ns Sy 0 Ns | Ns 1 Pq int -No reads during writes. -. -.It Sy l2arc_write_boost Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong -Cold L2ARC devices will have -.Sy l2arc_write_max -increased by this amount while they remain cold. -. -.It Sy l2arc_write_max Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong -Max write bytes per interval. -. -.It Sy l2arc_rebuild_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Rebuild the L2ARC when importing a pool (persistent L2ARC). -This can be disabled if there are problems importing a pool -or attaching an L2ARC device (e.g. the L2ARC device is slow -in reading stored log metadata, or the metadata -has become somehow fragmented/unusable). -. -.It Sy l2arc_rebuild_blocks_min_l2size Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong -Mininum size of an L2ARC device required in order to write log blocks in it. -The log blocks are used upon importing the pool to rebuild the persistent L2ARC. -.Pp -For L2ARC devices less than 1GB, the amount of data -.Fn l2arc_evict -evicts is significant compared to the amount of restored L2ARC data. -In this case, do not write log blocks in L2ARC in order not to waste space. -. -.It Sy metaslab_aliquot Ns = Ns Sy 524288 Ns B Po 512kB Pc Pq ulong -Metaslab granularity, in bytes. -This is roughly similar to what would be referred to as the "stripe size" -in traditional RAID arrays. -In normal operation, ZFS will try to write this amount of data -to a top-level vdev before moving on to the next one. -. -.It Sy metaslab_bias_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable metaslab group biasing based on their vdevs' over- or under-utilization -relative to the pool. -. -.It Sy metaslab_force_ganging Ns = Ns Sy 16777217 Ns B Ns B Po 16MB + 1B Pc Pq ulong -Make some blocks above a certain size be gang blocks. -This option is used by the test suite to facilitate testing. -. -.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Ns B Po 1MB Pc Pq int -When attempting to log an output nvlist of an ioctl in the on-disk history, -the output will not be stored if it is larger than this size (in bytes). -This must be less than -.Sy DMU_MAX_ACCESS Pq 64MB . -This applies primarily to -.Fn zfs_ioc_channel_program Pq cf. Xr zfs-program 8 . -. -.It Sy zfs_keep_log_spacemaps_at_export Ns = Ns Sy 0 Ns | Ns 1 Pq int -Prevent log spacemaps from being destroyed during pool exports and destroys. -. -.It Sy zfs_metaslab_segment_weight_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable/disable segment-based metaslab selection. -. -.It Sy zfs_metaslab_switch_threshold Ns = Ns Sy 2 Pq int -When using segment-based metaslab selection, continue allocating -from the active metaslab until this option's -worth of buckets have been exhausted. -. -.It Sy metaslab_debug_load Ns = Ns Sy 0 Ns | Ns 1 Pq int -Load all metaslabs during pool import. -. -.It Sy metaslab_debug_unload Ns = Ns Sy 0 Ns | Ns 1 Pq int -Prevent metaslabs from being unloaded. -. -.It Sy metaslab_fragmentation_factor_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable use of the fragmentation metric in computing metaslab weights. -. -.It Sy metaslab_df_max_search Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int -Maximum distance to search forward from the last offset. -Without this limit, fragmented pools can see -.Em >100`000 -iterations and -.Fn metaslab_block_picker -becomes the performance limiting factor on high-performance storage. -.Pp -With the default setting of -.Sy 16MB , -we typically see less than -.Em 500 -iterations, even with very fragmented -.Sy ashift Ns = Ns Sy 9 -pools. -The maximum number of iterations possible is -.Sy metaslab_df_max_search / 2^(ashift+1) . -With the default setting of -.Sy 16MB -this is -.Em 16*1024 Pq with Sy ashift Ns = Ns Sy 9 -or -.Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 . -. -.It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int -If not searching forward (due to -.Sy metaslab_df_max_search , metaslab_df_free_pct , -.No or Sy metaslab_df_alloc_threshold ) , -this tunable controls which segment is used. -If set, we will use the largest free segment. -If unset, we will use a segment of at least the requested size. -. -.It Sy zfs_metaslab_max_size_cache_sec Ns = Ns Sy 3600 Ns s Po 1h Pc Pq ulong -When we unload a metaslab, we cache the size of the largest free chunk. -We use that cached size to determine whether or not to load a metaslab -for a given allocation. -As more frees accumulate in that metaslab while it's unloaded, -the cached max size becomes less and less accurate. -After a number of seconds controlled by this tunable, -we stop considering the cached max size and start -considering only the histogram instead. -. -.It Sy zfs_metaslab_mem_limit Ns = Ns Sy 25 Ns % Pq int -When we are loading a new metaslab, we check the amount of memory being used -to store metaslab range trees. -If it is over a threshold, we attempt to unload the least recently used metaslab -to prevent the system from clogging all of its memory with range trees. -This tunable sets the percentage of total system memory that is the threshold. -. -.It Sy zfs_metaslab_try_hard_before_gang Ns = Ns Sy 0 Ns | Ns 1 Pq int -.Bl -item -compact -.It -If unset, we will first try normal allocation. -.It -If that fails then we will do a gang allocation. -.It -If that fails then we will do a "try hard" gang allocation. -.It -If that fails then we will have a multi-layer gang block. -.El -.Pp -.Bl -item -compact -.It -If set, we will first try normal allocation. -.It -If that fails then we will do a "try hard" allocation. -.It -If that fails we will do a gang allocation. -.It -If that fails we will do a "try hard" gang allocation. -.It -If that fails then we will have a multi-layer gang block. -.El -. -.It Sy zfs_metaslab_find_max_tries Ns = Ns Sy 100 Pq int -When not trying hard, we only consider this number of the best metaslabs. -This improves performance, especially when there are many metaslabs per vdev -and the allocation can't actually be satisfied -(so we would otherwise iterate all metaslabs). -. -.It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq int -When a vdev is added, target this number of metaslabs per top-level vdev. -. -.It Sy zfs_vdev_default_ms_shift Ns = Ns Sy 29 Po 512MB Pc Pq int -Default limit for metaslab size. -. -.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy ASHIFT_MAX Po 16 Pc Pq ulong -Maximum ashift used when optimizing for logical -> physical sector size on new -top-level vdevs. -. -.It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq ulong -Minimum ashift used when creating new top-level vdevs. -. -.It Sy zfs_vdev_min_ms_count Ns = Ns Sy 16 Pq int -Minimum number of metaslabs to create in a top-level vdev. -. -.It Sy vdev_validate_skip Ns = Ns Sy 0 Ns | Ns 1 Pq int -Skip label validation steps during pool import. -Changing is not recommended unless you know what you're doing -and are recovering a damaged label. -. -.It Sy zfs_vdev_ms_count_limit Ns = Ns Sy 131072 Po 128k Pc Pq int -Practical upper limit of total metaslabs per top-level vdev. -. -.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable metaslab group preloading. -. -.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Give more weight to metaslabs with lower LBAs, -assuming they have greater bandwidth, -as is typically the case on a modern constant angular velocity disk drive. -. -.It Sy metaslab_unload_delay Ns = Ns Sy 32 Pq int -After a metaslab is used, we keep it loaded for this many TXGs, to attempt to -reduce unnecessary reloading. -Note that both this many TXGs and -.Sy metaslab_unload_delay_ms -milliseconds must pass before unloading will occur. -. -.It Sy metaslab_unload_delay_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq int -After a metaslab is used, we keep it loaded for this many milliseconds, -to attempt to reduce unnecessary reloading. -Note, that both this many milliseconds and -.Sy metaslab_unload_delay -TXGs must pass before unloading will occur. -. -.It Sy reference_history Ns = Ns Sy 3 Pq int -Maximum reference holders being tracked when reference_tracking_enable is active. -. -.It Sy reference_tracking_enable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Track reference holders to -.Sy refcount_t -objects (debug builds only). -. -.It Sy send_holes_without_birth_time Ns = Ns Sy 1 Ns | Ns 0 Pq int -When set, the -.Sy hole_birth -optimization will not be used, and all holes will always be sent during a -.Nm zfs Cm send . -This is useful if you suspect your datasets are affected by a bug in -.Sy hole_birth . -. -.It Sy spa_config_path Ns = Ns Pa /etc/zfs/zpool.cache Pq charp -SPA config file. -. -.It Sy spa_asize_inflation Ns = Ns Sy 24 Pq int -Multiplication factor used to estimate actual disk consumption from the -size of data being written. -The default value is a worst case estimate, -but lower values may be valid for a given pool depending on its configuration. -Pool administrators who understand the factors involved -may wish to specify a more realistic inflation factor, -particularly if they operate close to quota or capacity limits. -. -.It Sy spa_load_print_vdev_tree Ns = Ns Sy 0 Ns | Ns 1 Pq int -Whether to print the vdev tree in the debugging message buffer during pool import. -. -.It Sy spa_load_verify_data Ns = Ns Sy 1 Ns | Ns 0 Pq int -Whether to traverse data blocks during an "extreme rewind" -.Pq Fl X -import. -.Pp -An extreme rewind import normally performs a full traversal of all -blocks in the pool for verification. -If this parameter is unset, the traversal skips non-metadata blocks. -It can be toggled once the -import has started to stop or start the traversal of non-metadata blocks. -. -.It Sy spa_load_verify_metadata Ns = Ns Sy 1 Ns | Ns 0 Pq int -Whether to traverse blocks during an "extreme rewind" -.Pq Fl X -pool import. -.Pp -An extreme rewind import normally performs a full traversal of all -blocks in the pool for verification. -If this parameter is unset, the traversal is not performed. -It can be toggled once the import has started to stop or start the traversal. -. -.It Sy spa_load_verify_shift Ns = Ns Sy 4 Po 1/16th Pc Pq int -Sets the maximum number of bytes to consume during pool import to the log2 -fraction of the target ARC size. -. -.It Sy spa_slop_shift Ns = Ns Sy 5 Po 1/32nd Pc Pq int -Normally, we don't allow the last -.Sy 3.2% Pq Sy 1/2^spa_slop_shift -of space in the pool to be consumed. -This ensures that we don't run the pool completely out of space, -due to unaccounted changes (e.g. to the MOS). -It also limits the worst-case time to allocate space. -If we have less than this amount of free space, -most ZPL operations (e.g. write, create) will return -.Sy ENOSPC . -. -.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int -During top-level vdev removal, chunks of data are copied from the vdev -which may include free space in order to trade bandwidth for IOPS. -This parameter determines the maximum span of free space, in bytes, -which will be included as "unnecessary" data in a chunk of copied data. -.Pp -The default value here was chosen to align with -.Sy zfs_vdev_read_gap_limit , -which is a similar concept when doing -regular reads (but there's no reason it has to be the same). -. -.It Sy vdev_file_logical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong -Logical ashift for file-based devices. -. -.It Sy vdev_file_physical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong -Physical ashift for file-based devices. -. -.It Sy zap_iterate_prefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int -If set, when we start iterating over a ZAP object, -prefetch the entire object (all leaf blocks). -However, this is limited by -.Sy dmu_prefetch_max . -. -.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong -If prefetching is enabled, disable prefetching for reads larger than this size. -. -.It Sy zfetch_max_distance Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq uint -Max bytes to prefetch per stream. -. -.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64MB Pc Pq uint -Max bytes to prefetch indirects for per stream. -. -.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint -Max number of streams per zfetch (prefetch streams per file). -. -.It Sy zfetch_min_sec_reap Ns = Ns Sy 2 Pq uint -Min time before an active prefetch stream can be reclaimed -. -.It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enables ARC from using scatter/gather lists and forces all allocations to be -linear in kernel memory. -Disabling can improve performance in some code paths -at the expense of fragmented kernel memory. -. -.It Sy zfs_abd_scatter_max_order Ns = Ns Sy MAX_ORDER-1 Pq uint -Maximum number of consecutive memory pages allocated in a single block for -scatter/gather lists. -.Pp -The value of -.Sy MAX_ORDER -depends on kernel configuration. -. -.It Sy zfs_abd_scatter_min_size Ns = Ns Sy 1536 Ns B Po 1.5kB Pc Pq uint -This is the minimum allocation size that will use scatter (page-based) ABDs. -Smaller allocations will use linear ABDs. -. -.It Sy zfs_arc_dnode_limit Ns = Ns Sy 0 Ns B Pq ulong -When the number of bytes consumed by dnodes in the ARC exceeds this number of -bytes, try to unpin some of it in response to demand for non-metadata. -This value acts as a ceiling to the amount of dnode metadata, and defaults to -.Sy 0 , -which indicates that a percent which is based on -.Sy zfs_arc_dnode_limit_percent -of the ARC meta buffers that may be used for dnodes. -.Pp -Also see -.Sy zfs_arc_meta_prune -which serves a similar purpose but is used -when the amount of metadata in the ARC exceeds -.Sy zfs_arc_meta_limit -rather than in response to overall demand for non-metadata. -. -.It Sy zfs_arc_dnode_limit_percent Ns = Ns Sy 10 Ns % Pq ulong -Percentage that can be consumed by dnodes of ARC meta buffers. -.Pp -See also -.Sy zfs_arc_dnode_limit , -which serves a similar purpose but has a higher priority if nonzero. -. -.It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq ulong -Percentage of ARC dnodes to try to scan in response to demand for non-metadata -when the number of bytes consumed by dnodes exceeds -.Sy zfs_arc_dnode_limit . -. -.It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8kB Pc Pq int -The ARC's buffer hash table is sized based on the assumption of an average -block size of this value. -This works out to roughly 1MB of hash table per 1GB of physical memory -with 8-byte pointers. -For configurations with a known larger average block size, -this value can be increased to reduce the memory footprint. -. -.It Sy zfs_arc_eviction_pct Ns = Ns Sy 200 Ns % Pq int -When -.Fn arc_is_overflowing , -.Fn arc_get_data_impl -waits for this percent of the requested amount of data to be evicted. -For example, by default, for every -.Em 2kB -that's evicted, -.Em 1kB -of it may be "reused" by a new allocation. -Since this is above -.Sy 100 Ns % , -it ensures that progress is made towards getting -.Sy arc_size No under Sy arc_c . -Since this is finite, it ensures that allocations can still happen, -even during the potentially long time that -.Sy arc_size No is more than Sy arc_c . -. -.It Sy zfs_arc_evict_batch_limit Ns = Ns Sy 10 Pq int -Number ARC headers to evict per sub-list before proceeding to another sub-list. -This batch-style operation prevents entire sub-lists from being evicted at once -but comes at a cost of additional unlocking and locking. -. -.It Sy zfs_arc_grow_retry Ns = Ns Sy 0 Ns s Pq int -If set to a non zero value, it will replace the -.Sy arc_grow_retry -value with this value. -The -.Sy arc_grow_retry -.No value Pq default Sy 5 Ns s -is the number of seconds the ARC will wait before -trying to resume growth after a memory pressure event. -. -.It Sy zfs_arc_lotsfree_percent Ns = Ns Sy 10 Ns % Pq int -Throttle I/O when free system memory drops below this percentage of total -system memory. -Setting this value to -.Sy 0 -will disable the throttle. -. -.It Sy zfs_arc_max Ns = Ns Sy 0 Ns B Pq ulong -Max size of ARC in bytes. -If -.Sy 0 , -then the max size of ARC is determined by the amount of system memory installed. -Under Linux, half of system memory will be used as the limit. -Under -.Fx , -the larger of -.Sy all_system_memory - 1GB No and Sy 5/8 * all_system_memory -will be used as the limit. -This value must be at least -.Sy 67108864 Ns B Pq 64MB . -.Pp -This value can be changed dynamically, with some caveats. -It cannot be set back to -.Sy 0 -while running, and reducing it below the current ARC size will not cause -the ARC to shrink without memory pressure to induce shrinking. -. -.It Sy zfs_arc_meta_adjust_restarts Ns = Ns Sy 4096 Pq ulong -The number of restart passes to make while scanning the ARC attempting -the free buffers in order to stay below the -.Sy fs_arc_meta_limit . -This value should not need to be tuned but is available to facilitate -performance analysis. -. -.It Sy zfs_arc_meta_limit Ns = Ns Sy 0 Ns B Pq ulong -The maximum allowed size in bytes that metadata buffers are allowed to -consume in the ARC. -When this limit is reached, metadata buffers will be reclaimed, -even if the overall -.Sy arc_c_max -has not been reached. -It defaults to -.Sy 0 , -which indicates that a percentage based on -.Sy zfs_arc_meta_limit_percent -of the ARC may be used for metadata. -.Pp -This value my be changed dynamically, except that must be set to an explicit value -.Pq cannot be set back to Sy 0 . -. -.It Sy zfs_arc_meta_limit_percent Ns = Ns Sy 75 Ns % Pq ulong -Percentage of ARC buffers that can be used for metadata. -.Pp -See also -.Sy zfs_arc_meta_limit , -which serves a similar purpose but has a higher priority if nonzero. -. -.It Sy zfs_arc_meta_min Ns = Ns Sy 0 Ns B Pq ulong -The minimum allowed size in bytes that metadata buffers may consume in -the ARC. -. -.It Sy zfs_arc_meta_prune Ns = Ns Sy 10000 Pq int -The number of dentries and inodes to be scanned looking for entries -which can be dropped. -This may be required when the ARC reaches the -.Sy zfs_arc_meta_limit -because dentries and inodes can pin buffers in the ARC. -Increasing this value will cause to dentry and inode caches -to be pruned more aggressively. -Setting this value to -.Sy 0 -will disable pruning the inode and dentry caches. -. -.It Sy zfs_arc_meta_strategy Ns = Ns Sy 1 Ns | Ns 0 Pq int -Define the strategy for ARC metadata buffer eviction (meta reclaim strategy): -.Bl -tag -compact -offset 4n -width "0 (META_ONLY)" -.It Sy 0 Pq META_ONLY -evict only the ARC metadata buffers -.It Sy 1 Pq BALANCED -additional data buffers may be evicted if required -to evict the required number of metadata buffers. -.El -. -.It Sy zfs_arc_min Ns = Ns Sy 0 Ns B Pq ulong -Min size of ARC in bytes. -.No If set to Sy 0 , arc_c_min -will default to consuming the larger of -.Sy 32MB No or Sy all_system_memory/32 . -. -.It Sy zfs_arc_min_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 1s Pc Pq int -Minimum time prefetched blocks are locked in the ARC. -. -.It Sy zfs_arc_min_prescient_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 6s Pc Pq int -Minimum time "prescient prefetched" blocks are locked in the ARC. -These blocks are meant to be prefetched fairly aggressively ahead of -the code that may use them. -. -.It Sy zfs_max_missing_tvds Ns = Ns Sy 0 Pq int -Number of missing top-level vdevs which will be allowed during -pool import (only in read-only mode). -. -.It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq ulong -Maximum size in bytes allowed to be passed as -.Sy zc_nvlist_src_size -for ioctls on -.Pa /dev/zfs . -This prevents a user from causing the kernel to allocate -an excessive amount of memory. -When the limit is exceeded, the ioctl fails with -.Sy EINVAL -and a description of the error is sent to the -.Pa zfs-dbgmsg -log. -This parameter should not need to be touched under normal circumstances. -If -.Sy 0 , -equivalent to a quarter of the user-wired memory limit under -.Fx -and to -.Sy 134217728 Ns B Pq 128MB -under Linux. -. -.It Sy zfs_multilist_num_sublists Ns = Ns Sy 0 Pq int -To allow more fine-grained locking, each ARC state contains a series -of lists for both data and metadata objects. -Locking is performed at the level of these "sub-lists". -This parameters controls the number of sub-lists per ARC state, -and also applies to other uses of the multilist data structure. -.Pp -If -.Sy 0 , -equivalent to the greater of the number of online CPUs and -.Sy 4 . -. -.It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int -The ARC size is considered to be overflowing if it exceeds the current -ARC target size -.Pq Sy arc_c -by a threshold determined by this parameter. -The threshold is calculated as a fraction of -.Sy arc_c -using the formula -.Sy arc_c >> zfs_arc_overflow_shift . -.Pp -The default value of -.Sy 8 -causes the ARC to be considered overflowing if it exceeds the target size by -.Em 1/256th Pq Em 0.3% -of the target size. -.Pp -When the ARC is overflowing, new buffer allocations are stalled until -the reclaim thread catches up and the overflow condition no longer exists. -. -.It Sy zfs_arc_p_min_shift Ns = Ns Sy 0 Pq int -If nonzero, this will update -.Sy arc_p_min_shift Pq default Sy 4 -with the new value. -.Sy arc_p_min_shift No is used as a shift of Sy arc_c -when calculating the minumum -.Sy arc_p No size. -. -.It Sy zfs_arc_p_dampener_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Disable -.Sy arc_p -adapt dampener, which reduces the maximum single adjustment to -.Sy arc_p . -. -.It Sy zfs_arc_shrink_shift Ns = Ns Sy 0 Pq int -If nonzero, this will update -.Sy arc_shrink_shift Pq default Sy 7 -with the new value. -. -.It Sy zfs_arc_pc_percent Ns = Ns Sy 0 Ns % Po off Pc Pq uint -Percent of pagecache to reclaim ARC to. -.Pp -This tunable allows the ZFS ARC to play more nicely -with the kernel's LRU pagecache. -It can guarantee that the ARC size won't collapse under scanning -pressure on the pagecache, yet still allows the ARC to be reclaimed down to -.Sy zfs_arc_min -if necessary. -This value is specified as percent of pagecache size (as measured by -.Sy NR_FILE_PAGES ) , -where that percent may exceed -.Sy 100 . -This -only operates during memory pressure/reclaim. -. -.It Sy zfs_arc_shrinker_limit Ns = Ns Sy 10000 Pq int -This is a limit on how many pages the ARC shrinker makes available for -eviction in response to one page allocation attempt. -Note that in practice, the kernel's shrinker can ask us to evict -up to about four times this for one allocation attempt. -.Pp -The default limit of -.Sy 10000 Pq in practice, Em 160MB No per allocation attempt with 4kB pages -limits the amount of time spent attempting to reclaim ARC memory to -less than 100ms per allocation attempt, -even with a small average compressed block size of ~8kB. -.Pp -The parameter can be set to 0 (zero) to disable the limit, -and only applies on Linux. -. -.It Sy zfs_arc_sys_free Ns = Ns Sy 0 Ns B Pq ulong -The target number of bytes the ARC should leave as free memory on the system. -If zero, equivalent to the bigger of -.Sy 512kB No and Sy all_system_memory/64 . -. -.It Sy zfs_autoimport_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Disable pool import at module load by ignoring the cache file -.Pq Sy spa_config_path . -. -.It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint -Rate limit checksum events to this many per second. -Note that this should not be set below the ZED thresholds -(currently 10 checksums over 10 seconds) -or else the daemon may not trigger any action. -. -.It Sy zfs_commit_timeout_pct Ns = Ns Sy 5 Ns % Pq int -This controls the amount of time that a ZIL block (lwb) will remain "open" -when it isn't "full", and it has a thread waiting for it to be committed to -stable storage. -The timeout is scaled based on a percentage of the last lwb -latency to avoid significantly impacting the latency of each individual -transaction record (itx). -. -.It Sy zfs_condense_indirect_commit_entry_delay_ms Ns = Ns Sy 0 Ns ms Pq int -Vdev indirection layer (used for device removal) sleeps for this many -milliseconds during mapping generation. -Intended for use with the test suite to throttle vdev removal speed. -. -.It Sy zfs_condense_indirect_obsolete_pct Ns = Ns Sy 25 Ns % Pq int -Minimum percent of obsolete bytes in vdev mapping required to attempt to condense -.Pq see Sy zfs_condense_indirect_vdevs_enable . -Intended for use with the test suite -to facilitate triggering condensing as needed. -. -.It Sy zfs_condense_indirect_vdevs_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable condensing indirect vdev mappings. -When set, attempt to condense indirect vdev mappings -if the mapping uses more than -.Sy zfs_condense_min_mapping_bytes -bytes of memory and if the obsolete space map object uses more than -.Sy zfs_condense_max_obsolete_bytes -bytes on-disk. -The condensing process is an attempt to save memory by removing obsolete mappings. -. -.It Sy zfs_condense_max_obsolete_bytes Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong -Only attempt to condense indirect vdev mappings if the on-disk size -of the obsolete space map object is greater than this number of bytes -.Pq see Sy zfs_condense_indirect_vdevs_enable . -. -.It Sy zfs_condense_min_mapping_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq ulong -Minimum size vdev mapping to attempt to condense -.Pq see Sy zfs_condense_indirect_vdevs_enable . -. -.It Sy zfs_dbgmsg_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Internally ZFS keeps a small log to facilitate debugging. -The log is enabled by default, and can be disabled by unsetting this option. -The contents of the log can be accessed by reading -.Pa /proc/spl/kstat/zfs/dbgmsg . -Writing -.Sy 0 -to the file clears the log. -.Pp -This setting does not influence debug prints due to -.Sy zfs_flags . -. -.It Sy zfs_dbgmsg_maxsize Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int -Maximum size of the internal ZFS debug log. -. -.It Sy zfs_dbuf_state_index Ns = Ns Sy 0 Pq int -Historically used for controlling what reporting was available under -.Pa /proc/spl/kstat/zfs . -No effect. -. -.It Sy zfs_deadman_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -When a pool sync operation takes longer than -.Sy zfs_deadman_synctime_ms , -or when an individual I/O operation takes longer than -.Sy zfs_deadman_ziotime_ms , -then the operation is considered to be "hung". -If -.Sy zfs_deadman_enabled -is set, then the deadman behavior is invoked as described by -.Sy zfs_deadman_failmode . -By default, the deadman is enabled and set to -.Sy wait -which results in "hung" I/Os only being logged. -The deadman is automatically disabled when a pool gets suspended. -. -.It Sy zfs_deadman_failmode Ns = Ns Sy wait Pq charp -Controls the failure behavior when the deadman detects a "hung" I/O operation. -Valid values are: -.Bl -tag -compact -offset 4n -width "continue" -.It Sy wait -Wait for a "hung" operation to complete. -For each "hung" operation a "deadman" event will be posted -describing that operation. -.It Sy continue -Attempt to recover from a "hung" operation by re-dispatching it -to the I/O pipeline if possible. -.It Sy panic -Panic the system. -This can be used to facilitate automatic fail-over -to a properly configured fail-over partner. -.El -. -.It Sy zfs_deadman_checktime_ms Ns = Ns Sy 60000 Ns ms Po 1min Pc Pq int -Check time in milliseconds. -This defines the frequency at which we check for hung I/O requests -and potentially invoke the -.Sy zfs_deadman_failmode -behavior. -. -.It Sy zfs_deadman_synctime_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq ulong -Interval in milliseconds after which the deadman is triggered and also -the interval after which a pool sync operation is considered to be "hung". -Once this limit is exceeded the deadman will be invoked every -.Sy zfs_deadman_checktime_ms -milliseconds until the pool sync completes. -. -.It Sy zfs_deadman_ziotime_ms Ns = Ns Sy 300000 Ns ms Po 5min Pc Pq ulong -Interval in milliseconds after which the deadman is triggered and an -individual I/O operation is considered to be "hung". -As long as the operation remains "hung", -the deadman will be invoked every -.Sy zfs_deadman_checktime_ms -milliseconds until the operation completes. -. -.It Sy zfs_dedup_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int -Enable prefetching dedup-ed blocks which are going to be freed. -. -.It Sy zfs_delay_min_dirty_percent Ns = Ns Sy 60 Ns % Pq int -Start to delay each transaction once there is this amount of dirty data, -expressed as a percentage of -.Sy zfs_dirty_data_max . -This value should be at least -.Sy zfs_vdev_async_write_active_max_dirty_percent . -.No See Sx ZFS TRANSACTION DELAY . -. -.It Sy zfs_delay_scale Ns = Ns Sy 500000 Pq int -This controls how quickly the transaction delay approaches infinity. -Larger values cause longer delays for a given amount of dirty data. -.Pp -For the smoothest delay, this value should be about 1 billion divided -by the maximum number of operations per second. -This will smoothly handle between ten times and a tenth of this number. -.No See Sx ZFS TRANSACTION DELAY . -.Pp -.Sy zfs_delay_scale * zfs_dirty_data_max Em must be smaller than Sy 2^64 . -. -.It Sy zfs_disable_ivset_guid_check Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disables requirement for IVset GUIDs to be present and match when doing a raw -receive of encrypted datasets. -Intended for users whose pools were created with -OpenZFS pre-release versions and now have compatibility issues. -. -.It Sy zfs_key_max_salt_uses Ns = Ns Sy 400000000 Po 4*10^8 Pc Pq ulong -Maximum number of uses of a single salt value before generating a new one for -encrypted datasets. -The default value is also the maximum. -. -.It Sy zfs_object_mutex_size Ns = Ns Sy 64 Pq uint -Size of the znode hashtable used for holds. -.Pp -Due to the need to hold locks on objects that may not exist yet, kernel mutexes -are not created per-object and instead a hashtable is used where collisions -will result in objects waiting when there is not actually contention on the -same object. -. -.It Sy zfs_slow_io_events_per_second Ns = Ns Sy 20 Ns /s Pq int -Rate limit delay and deadman zevents (which report slow I/Os) to this many per -second. -. -.It Sy zfs_unflushed_max_mem_amt Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong -Upper-bound limit for unflushed metadata changes to be held by the -log spacemap in memory, in bytes. -. -.It Sy zfs_unflushed_max_mem_ppm Ns = Ns Sy 1000 Ns ppm Po 0.1% Pc Pq ulong -Part of overall system memory that ZFS allows to be used -for unflushed metadata changes by the log spacemap, in millionths. -. -.It Sy zfs_unflushed_log_block_max Ns = Ns Sy 262144 Po 256k Pc Pq ulong -Describes the maximum number of log spacemap blocks allowed for each pool. -The default value means that the space in all the log spacemaps -can add up to no more than -.Sy 262144 -blocks (which means -.Em 32GB -of logical space before compression and ditto blocks, -assuming that blocksize is -.Em 128kB ) . -.Pp -This tunable is important because it involves a trade-off between import -time after an unclean export and the frequency of flushing metaslabs. -The higher this number is, the more log blocks we allow when the pool is -active which means that we flush metaslabs less often and thus decrease -the number of I/Os for spacemap updates per TXG. -At the same time though, that means that in the event of an unclean export, -there will be more log spacemap blocks for us to read, inducing overhead -in the import time of the pool. -The lower the number, the amount of flushing increases, destroying log -blocks quicker as they become obsolete faster, which leaves less blocks -to be read during import time after a crash. -.Pp -Each log spacemap block existing during pool import leads to approximately -one extra logical I/O issued. -This is the reason why this tunable is exposed in terms of blocks rather -than space used. -. -.It Sy zfs_unflushed_log_block_min Ns = Ns Sy 1000 Pq ulong -If the number of metaslabs is small and our incoming rate is high, -we could get into a situation that we are flushing all our metaslabs every TXG. -Thus we always allow at least this many log blocks. -. -.It Sy zfs_unflushed_log_block_pct Ns = Ns Sy 400 Ns % Pq ulong -Tunable used to determine the number of blocks that can be used for -the spacemap log, expressed as a percentage of the total number of -metaslabs in the pool. -. -.It Sy zfs_unlink_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq uint -When enabled, files will not be asynchronously removed from the list of pending -unlinks and the space they consume will be leaked. -Once this option has been disabled and the dataset is remounted, -the pending unlinks will be processed and the freed space returned to the pool. -This option is used by the test suite. -. -.It Sy zfs_delete_blocks Ns = Ns Sy 20480 Pq ulong -This is the used to define a large file for the purposes of deletion. -Files containing more than -.Sy zfs_delete_blocks -will be deleted asynchronously, while smaller files are deleted synchronously. -Decreasing this value will reduce the time spent in an -.Xr unlink 2 -system call, at the expense of a longer delay before the freed space is available. -. -.It Sy zfs_dirty_data_max Ns = Pq int -Determines the dirty space limit in bytes. -Once this limit is exceeded, new writes are halted until space frees up. -This parameter takes precedence over -.Sy zfs_dirty_data_max_percent . -.No See Sx ZFS TRANSACTION DELAY . -.Pp -Defaults to -.Sy physical_ram/10 , -capped at -.Sy zfs_dirty_data_max_max . -. -.It Sy zfs_dirty_data_max_max Ns = Pq int -Maximum allowable value of -.Sy zfs_dirty_data_max , -expressed in bytes. -This limit is only enforced at module load time, and will be ignored if -.Sy zfs_dirty_data_max -is later changed. -This parameter takes precedence over -.Sy zfs_dirty_data_max_max_percent . -.No See Sx ZFS TRANSACTION DELAY . -.Pp -Defaults to -.Sy physical_ram/4 , -. -.It Sy zfs_dirty_data_max_max_percent Ns = Ns Sy 25 Ns % Pq int -Maximum allowable value of -.Sy zfs_dirty_data_max , -expressed as a percentage of physical RAM. -This limit is only enforced at module load time, and will be ignored if -.Sy zfs_dirty_data_max -is later changed. -The parameter -.Sy zfs_dirty_data_max_max -takes precedence over this one. -.No See Sx ZFS TRANSACTION DELAY . -. -.It Sy zfs_dirty_data_max_percent Ns = Ns Sy 10 Ns % Pq int -Determines the dirty space limit, expressed as a percentage of all memory. -Once this limit is exceeded, new writes are halted until space frees up. -The parameter -.Sy zfs_dirty_data_max -takes precedence over this one. -.No See Sx ZFS TRANSACTION DELAY . -.Pp -Subject to -.Sy zfs_dirty_data_max_max . -. -.It Sy zfs_dirty_data_sync_percent Ns = Ns Sy 20 Ns % Pq int -Start syncing out a transaction group if there's at least this much dirty data -.Pq as a percentage of Sy zfs_dirty_data_max . -This should be less than -.Sy zfs_vdev_async_write_active_min_dirty_percent . -. -.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint -Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be -preallocated for a file in order to guarantee that later writes will not -run out of space. -Instead, -.Xr fallocate 2 -space preallocation only checks that sufficient space is currently available -in the pool or the user's project quota allocation, -and then creates a sparse file of the requested size. -The requested space is multiplied by -.Sy zfs_fallocate_reserve_percent -to allow additional space for indirect blocks and other internal metadata. -Setting this to -.Sy 0 -disables support for -.Xr fallocate 2 -and causes it to return -.Sy EOPNOTSUPP . -. -.It Sy zfs_fletcher_4_impl Ns = Ns Sy fastest Pq string -Select a fletcher 4 implementation. -.Pp -Supported selectors are: -.Sy fastest , scalar , sse2 , ssse3 , avx2 , avx512f , avx512bw , -.No and Sy aarch64_neon . -All except -.Sy fastest No and Sy scalar -require instruction set extensions to be available, -and will only appear if ZFS detects that they are present at runtime. -If multiple implementations of fletcher 4 are available, the -.Sy fastest -will be chosen using a micro benchmark. -Selecting -.Sy scalar -results in the original CPU-based calculation being used. -Selecting any option other than -.Sy fastest No or Sy scalar -results in vector instructions -from the respective CPU instruction set being used. -. -.It Sy zfs_free_bpobj_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable/disable the processing of the free_bpobj object. -. -.It Sy zfs_async_block_max_blocks Ns = Ns Sy ULONG_MAX Po unlimited Pc Pq ulong -Maximum number of blocks freed in a single TXG. -. -.It Sy zfs_max_async_dedup_frees Ns = Ns Sy 100000 Po 10^5 Pc Pq ulong -Maximum number of dedup blocks freed in a single TXG. -. -.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Pq ulong -If nonzer, override record size calculation for -.Nm zfs Cm send -estimates. -. -.It Sy zfs_vdev_async_read_max_active Ns = Ns Sy 3 Pq int -Maximum asynchronous read I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_async_read_min_active Ns = Ns Sy 1 Pq int -Minimum asynchronous read I/O operation active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_async_write_active_max_dirty_percent Ns = Ns Sy 60 Ns % Pq int -When the pool has more than this much dirty data, use -.Sy zfs_vdev_async_write_max_active -to limit active async writes. -If the dirty data is between the minimum and maximum, -the active I/O limit is linearly interpolated. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_async_write_active_min_dirty_percent Ns = Ns Sy 30 Ns % Pq int -When the pool has less than this much dirty data, use -.Sy zfs_vdev_async_write_min_active -to limit active async writes. -If the dirty data is between the minimum and maximum, -the active I/O limit is linearly -interpolated. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_async_write_max_active Ns = Ns Sy 30 Pq int -Maximum asynchronous write I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_async_write_min_active Ns = Ns Sy 2 Pq int -Minimum asynchronous write I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -.Pp -Lower values are associated with better latency on rotational media but poorer -resilver performance. -The default value of -.Sy 2 -was chosen as a compromise. -A value of -.Sy 3 -has been shown to improve resilver performance further at a cost of -further increasing latency. -. -.It Sy zfs_vdev_initializing_max_active Ns = Ns Sy 1 Pq int -Maximum initializing I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_initializing_min_active Ns = Ns Sy 1 Pq int -Minimum initializing I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_max_active Ns = Ns Sy 1000 Pq int -The maximum number of I/O operations active to each device. -Ideally, this will be at least the sum of each queue's -.Sy max_active . -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_rebuild_max_active Ns = Ns Sy 3 Pq int -Maximum sequential resilver I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_rebuild_min_active Ns = Ns Sy 1 Pq int -Minimum sequential resilver I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_removal_max_active Ns = Ns Sy 2 Pq int -Maximum removal I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_removal_min_active Ns = Ns Sy 1 Pq int -Minimum removal I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_scrub_max_active Ns = Ns Sy 2 Pq int -Maximum scrub I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_scrub_min_active Ns = Ns Sy 1 Pq int -Minimum scrub I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_sync_read_max_active Ns = Ns Sy 10 Pq int -Maximum synchronous read I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_sync_read_min_active Ns = Ns Sy 10 Pq int -Minimum synchronous read I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_sync_write_max_active Ns = Ns Sy 10 Pq int -Maximum synchronous write I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_sync_write_min_active Ns = Ns Sy 10 Pq int -Minimum synchronous write I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_trim_max_active Ns = Ns Sy 2 Pq int -Maximum trim/discard I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_trim_min_active Ns = Ns Sy 1 Pq int -Minimum trim/discard I/O operations active to each device. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_nia_delay Ns = Ns Sy 5 Pq int -For non-interactive I/O (scrub, resilver, removal, initialize and rebuild), -the number of concurrently-active I/O operations is limited to -.Sy zfs_*_min_active , -unless the vdev is "idle". -When there are no interactive I/O operatinons active (synchronous or otherwise), -and -.Sy zfs_vdev_nia_delay -operations have completed since the last interactive operation, -then the vdev is considered to be "idle", -and the number of concurrently-active non-interactive operations is increased to -.Sy zfs_*_max_active . -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_nia_credit Ns = Ns Sy 5 Pq int -Some HDDs tend to prioritize sequential I/O so strongly, that concurrent -random I/O latency reaches several seconds. -On some HDDs this happens even if sequential I/O operations -are submitted one at a time, and so setting -.Sy zfs_*_max_active Ns = Sy 1 -does not help. -To prevent non-interactive I/O, like scrub, -from monopolizing the device, no more than -.Sy zfs_vdev_nia_credit operations can be sent -while there are outstanding incomplete interactive operations. -This enforced wait ensures the HDD services the interactive I/O -within a reasonable amount of time. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_queue_depth_pct Ns = Ns Sy 1000 Ns % Pq int -Maximum number of queued allocations per top-level vdev expressed as -a percentage of -.Sy zfs_vdev_async_write_max_active , -which allows the system to detect devices that are more capable -of handling allocations and to allocate more blocks to those devices. -This allows for dynamic allocation distribution when devices are imbalanced, -as fuller devices will tend to be slower than empty devices. -.Pp -Also see -.Sy zio_dva_throttle_enabled . -. -.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int -Time before expiring -.Pa .zfs/snapshot . -. -.It Sy zfs_admin_snapshot Ns = Ns Sy 0 Ns | Ns 1 Pq int -Allow the creation, removal, or renaming of entries in the -.Sy .zfs/snapshot -directory to cause the creation, destruction, or renaming of snapshots. -When enabled, this functionality works both locally and over NFS exports -which have the -.Em no_root_squash -option set. -. -.It Sy zfs_flags Ns = Ns Sy 0 Pq int -Set additional debugging flags. -The following flags may be bitwise-ored together: -.TS -box; -lbz r l l . - Value Symbolic Name Description -_ - 1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log. -* 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications. -* 4 ZFS_DEBUG_DNODE_VERIFY Enable extra dnode verifications. - 8 ZFS_DEBUG_SNAPNAMES Enable snapshot name verification. - 16 ZFS_DEBUG_MODIFY Check for illegally modified ARC buffers. - 64 ZFS_DEBUG_ZIO_FREE Enable verification of block frees. - 128 ZFS_DEBUG_HISTOGRAM_VERIFY Enable extra spacemap histogram verifications. - 256 ZFS_DEBUG_METASLAB_VERIFY Verify space accounting on disk matches in-memory \fBrange_trees\fP. - 512 ZFS_DEBUG_SET_ERROR Enable \fBSET_ERROR\fP and dprintf entries in the debug log. - 1024 ZFS_DEBUG_INDIRECT_REMAP Verify split blocks created by device removal. - 2048 ZFS_DEBUG_TRIM Verify TRIM ranges are always within the allocatable range tree. - 4096 ZFS_DEBUG_LOG_SPACEMAP Verify that the log summary is consistent with the spacemap log - and enable \fBzfs_dbgmsgs\fP for metaslab loading and flushing. -.TE -.Sy \& * No Requires debug build. -. -.It Sy zfs_free_leak_on_eio Ns = Ns Sy 0 Ns | Ns 1 Pq int -If destroy encounters an -.Sy EIO -while reading metadata (e.g. indirect blocks), -space referenced by the missing metadata can not be freed. -Normally this causes the background destroy to become "stalled", -as it is unable to make forward progress. -While in this stalled state, all remaining space to free -from the error-encountering filesystem is "temporarily leaked". -Set this flag to cause it to ignore the -.Sy EIO , -permanently leak the space from indirect blocks that can not be read, -and continue to free everything else that it can. -.Pp -The default "stalling" behavior is useful if the storage partially -fails (i.e. some but not all I/O operations fail), and then later recovers. -In this case, we will be able to continue pool operations while it is -partially failed, and when it recovers, we can continue to free the -space, with no leaks. -Note, however, that this case is actually fairly rare. -.Pp -Typically pools either -.Bl -enum -compact -offset 4n -width "1." -.It -fail completely (but perhaps temporarily, -e.g. due to a top-level vdev going offline), or -.It -have localized, permanent errors (e.g. disk returns the wrong data -due to bit flip or firmware bug). -.El -In the former case, this setting does not matter because the -pool will be suspended and the sync thread will not be able to make -forward progress regardless. -In the latter, because the error is permanent, the best we can do -is leak the minimum amount of space, -which is what setting this flag will do. -It is therefore reasonable for this flag to normally be set, -but we chose the more conservative approach of not setting it, -so that there is no possibility of -leaking space in the "partial temporary" failure case. -. -.It Sy zfs_free_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int -During a -.Nm zfs Cm destroy -operation using the -.Sy async_destroy -feature, -a minimum of this much time will be spent working on freeing blocks per TXG. -. -.It Sy zfs_obsolete_min_time_ms Ns = Ns Sy 500 Ns ms Pq int -Similar to -.Sy zfs_free_min_time_ms , -but for cleanup of old indirection records for removed vdevs. -. -.It Sy zfs_immediate_write_sz Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq long -Largest data block to write to the ZIL. -Larger blocks will be treated as if the dataset being written to had the -.Sy logbias Ns = Ns Sy throughput -property set. -. -.It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq ulong -Pattern written to vdev free space by -.Xr zpool-initialize 8 . -. -.It Sy zfs_initialize_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong -Size of writes used by -.Xr zpool-initialize 8 . -This option is used by the test suite. -. -.It Sy zfs_livelist_max_entries Ns = Ns Sy 500000 Po 5*10^5 Pc Pq ulong -The threshold size (in block pointers) at which we create a new sub-livelist. -Larger sublists are more costly from a memory perspective but the fewer -sublists there are, the lower the cost of insertion. -. -.It Sy zfs_livelist_min_percent_shared Ns = Ns Sy 75 Ns % Pq int -If the amount of shared space between a snapshot and its clone drops below -this threshold, the clone turns off the livelist and reverts to the old -deletion method. -This is in place because livelists no long give us a benefit -once a clone has been overwritten enough. -. -.It Sy zfs_livelist_condense_new_alloc Ns = Ns Sy 0 Pq int -Incremented each time an extra ALLOC blkptr is added to a livelist entry while -it is being condensed. -This option is used by the test suite to track race conditions. -. -.It Sy zfs_livelist_condense_sync_cancel Ns = Ns Sy 0 Pq int -Incremented each time livelist condensing is canceled while in -.Fn spa_livelist_condense_sync . -This option is used by the test suite to track race conditions. -. -.It Sy zfs_livelist_condense_sync_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int -When set, the livelist condense process pauses indefinitely before -executing the synctask - -.Fn spa_livelist_condense_sync . -This option is used by the test suite to trigger race conditions. -. -.It Sy zfs_livelist_condense_zthr_cancel Ns = Ns Sy 0 Pq int -Incremented each time livelist condensing is canceled while in -.Fn spa_livelist_condense_cb . -This option is used by the test suite to track race conditions. -. -.It Sy zfs_livelist_condense_zthr_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int -When set, the livelist condense process pauses indefinitely before -executing the open context condensing work in -.Fn spa_livelist_condense_cb . -This option is used by the test suite to trigger race conditions. -. -.It Sy zfs_lua_max_instrlimit Ns = Ns Sy 100000000 Po 10^8 Pc Pq ulong -The maximum execution time limit that can be set for a ZFS channel program, -specified as a number of Lua instructions. -. -.It Sy zfs_lua_max_memlimit Ns = Ns Sy 104857600 Po 100MB Pc Pq ulong -The maximum memory limit that can be set for a ZFS channel program, specified -in bytes. -. -.It Sy zfs_max_dataset_nesting Ns = Ns Sy 50 Pq int -The maximum depth of nested datasets. -This value can be tuned temporarily to -fix existing datasets that exceed the predefined limit. -. -.It Sy zfs_max_log_walking Ns = Ns Sy 5 Pq ulong -The number of past TXGs that the flushing algorithm of the log spacemap -feature uses to estimate incoming log blocks. -. -.It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong -Maximum number of rows allowed in the summary of the spacemap log. -. -.It Sy zfs_max_recordsize Ns = Ns Sy 1048576 Po 1MB Pc Pq int -We currently support block sizes from -.Em 512B No to Em 16MB . -The benefits of larger blocks, and thus larger I/O, -need to be weighed against the cost of COWing a giant block to modify one byte. -Additionally, very large blocks can have an impact on I/O latency, -and also potentially on the memory allocator. -Therefore, we do not allow the recordsize to be set larger than this tunable. -Larger blocks can be created by changing it, -and pools with larger blocks can always be imported and used, -regardless of this setting. -. -.It Sy zfs_allow_redacted_dataset_mount Ns = Ns Sy 0 Ns | Ns 1 Pq int -Allow datasets received with redacted send/receive to be mounted. -Normally disabled because these datasets may be missing key data. -. -.It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq ulong -Minimum number of metaslabs to flush per dirty TXG. -. -.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq int -Allow metaslabs to keep their active state as long as their fragmentation -percentage is no more than this value. -An active metaslab that exceeds this threshold -will no longer keep its active status allowing better metaslabs to be selected. -. -.It Sy zfs_mg_fragmentation_threshold Ns = Ns Sy 95 Ns % Pq int -Metaslab groups are considered eligible for allocations if their -fragmentation metric (measured as a percentage) is less than or equal to -this value. -If a metaslab group exceeds this threshold then it will be -skipped unless all metaslab groups within the metaslab class have also -crossed this threshold. -. -.It Sy zfs_mg_noalloc_threshold Ns = Ns Sy 0 Ns % Pq int -Defines a threshold at which metaslab groups should be eligible for allocations. -The value is expressed as a percentage of free space -beyond which a metaslab group is always eligible for allocations. -If a metaslab group's free space is less than or equal to the -threshold, the allocator will avoid allocating to that group -unless all groups in the pool have reached the threshold. -Once all groups have reached the threshold, all groups are allowed to accept -allocations. -The default value of -.Sy 0 -disables the feature and causes all metaslab groups to be eligible for allocations. -.Pp -This parameter allows one to deal with pools having heavily imbalanced -vdevs such as would be the case when a new vdev has been added. -Setting the threshold to a non-zero percentage will stop allocations -from being made to vdevs that aren't filled to the specified percentage -and allow lesser filled vdevs to acquire more allocations than they -otherwise would under the old -.Sy zfs_mg_alloc_failures -facility. -. -.It Sy zfs_ddt_data_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int -If enabled, ZFS will place DDT data into the special allocation class. -. -.It Sy zfs_user_indirect_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int -If enabled, ZFS will place user data indirect blocks -into the special allocation class. -. -.It Sy zfs_multihost_history Ns = Ns Sy 0 Pq int -Historical statistics for this many latest multihost updates will be available in -.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /multihost . -. -.It Sy zfs_multihost_interval Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq ulong -Used to control the frequency of multihost writes which are performed when the -.Sy multihost -pool property is on. -This is one of the factors used to determine the -length of the activity check during import. -.Pp -The multihost write period is -.Sy zfs_multihost_interval / leaf-vdevs . -On average a multihost write will be issued for each leaf vdev -every -.Sy zfs_multihost_interval -milliseconds. -In practice, the observed period can vary with the I/O load -and this observed value is the delay which is stored in the uberblock. -. -.It Sy zfs_multihost_import_intervals Ns = Ns Sy 20 Pq uint -Used to control the duration of the activity test on import. -Smaller values of -.Sy zfs_multihost_import_intervals -will reduce the import time but increase -the risk of failing to detect an active pool. -The total activity check time is never allowed to drop below one second. -.Pp -On import the activity check waits a minimum amount of time determined by -.Sy zfs_multihost_interval * zfs_multihost_import_intervals , -or the same product computed on the host which last had the pool imported, -whichever is greater. -The activity check time may be further extended if the value of MMP -delay found in the best uberblock indicates actual multihost updates happened -at longer intervals than -.Sy zfs_multihost_interval . -A minimum of -.Em 100ms -is enforced. -.Pp -.Sy 0 No is equivalent to Sy 1 . -. -.It Sy zfs_multihost_fail_intervals Ns = Ns Sy 10 Pq uint -Controls the behavior of the pool when multihost write failures or delays are -detected. -.Pp -When -.Sy 0 , -multihost write failures or delays are ignored. -The failures will still be reported to the ZED which depending on -its configuration may take action such as suspending the pool or offlining a -device. -.Pp -Otherwise, the pool will be suspended if -.Sy zfs_multihost_fail_intervals * zfs_multihost_interval -milliseconds pass without a successful MMP write. -This guarantees the activity test will see MMP writes if the pool is imported. -.Sy 1 No is equivalent to Sy 2 ; -this is necessary to prevent the pool from being suspended -due to normal, small I/O latency variations. -. -.It Sy zfs_no_scrub_io Ns = Ns Sy 0 Ns | Ns 1 Pq int -Set to disable scrub I/O. -This results in scrubs not actually scrubbing data and -simply doing a metadata crawl of the pool instead. -. -.It Sy zfs_no_scrub_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int -Set to disable block prefetching for scrubs. -. -.It Sy zfs_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable cache flush operations on disks when writing. -Setting this will cause pool corruption on power loss -if a volatile out-of-order write cache is enabled. -. -.It Sy zfs_nopwrite_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Allow no-operation writes. -The occurrence of nopwrites will further depend on other pool properties -.Pq i.a. the checksumming and compression algorithms . -. -.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 0 Ns | ns 1 Pq int -Enable forcing TXG sync to find holes. -When enabled forces ZFS to act like prior versions when -.Sy SEEK_HOLE No or Sy SEEK_DATA -flags are used, which, when a dnode is dirty, -causes TXGs to be synced so that this data can be found. -. -.It Sy zfs_pd_bytes_max Ns = Ns Sy 52428800 Ns B Po 50MB Pc Pq int -The number of bytes which should be prefetched during a pool traversal, like -.Nm zfs Cm send -or other data crawling operations. -. -.It Sy zfs_traverse_indirect_prefetch_limit Ns = Ns Sy 32 Pq int -The number of blocks pointed by indirect (non-L0) block which should be -prefetched during a pool traversal, like -.Nm zfs Cm send -or other data crawling operations. -. -.It Sy zfs_per_txg_dirty_frees_percent Ns = Ns Sy 5 Ns % Pq ulong -Control percentage of dirtied indirect blocks from frees allowed into one TXG. -After this threshold is crossed, additional frees will wait until the next TXG. -.Sy 0 No disables this throttle. -. -.It Sy zfs_prefetch_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable predictive prefetch. -Note that it leaves "prescient" prefetch (for. e.g.\& -.Nm zfs Cm send ) -intact. -Unlike predictive prefetch, prescient prefetch never issues I/O -that ends up not being needed, so it can't hurt performance. -. -.It Sy zfs_qat_checksum_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable QAT hardware acceleration for SHA256 checksums. -May be unset after the ZFS modules have been loaded to initialize the QAT -hardware as long as support is compiled in and the QAT driver is present. -. -.It Sy zfs_qat_compress_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable QAT hardware acceleration for gzip compression. -May be unset after the ZFS modules have been loaded to initialize the QAT -hardware as long as support is compiled in and the QAT driver is present. -. -.It Sy zfs_qat_encrypt_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable QAT hardware acceleration for AES-GCM encryption. -May be unset after the ZFS modules have been loaded to initialize the QAT -hardware as long as support is compiled in and the QAT driver is present. -. -.It Sy zfs_vnops_read_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq long -Bytes to read per chunk. -. -.It Sy zfs_read_history Ns = Ns Sy 0 Pq int -Historical statistics for this many latest reads will be available in -.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /reads . -. -.It Sy zfs_read_history_hits Ns = Ns Sy 0 Ns | Ns 1 Pq int -Include cache hits in read history -. -.It Sy zfs_rebuild_max_segment Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong -Maximum read segment size to issue when sequentially resilvering a -top-level vdev. -. -.It Sy zfs_rebuild_scrub_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Automatically start a pool scrub when the last active sequential resilver -completes in order to verify the checksums of all blocks which have been -resilvered. -This is enabled by default and strongly recommended. -. -.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong -Maximum amount of I/O that can be concurrently issued for a sequential -resilver per leaf device, given in bytes. -. -.It Sy zfs_reconstruct_indirect_combinations_max Ns = Ns Sy 4096 Pq int -If an indirect split block contains more than this many possible unique -combinations when being reconstructed, consider it too computationally -expensive to check them all. -Instead, try at most this many randomly selected -combinations each time the block is accessed. -This allows all segment copies to participate fairly -in the reconstruction when all combinations -cannot be checked and prevents repeated use of one bad copy. -. -.It Sy zfs_recover Ns = Ns Sy 0 Ns | Ns 1 Pq int -Set to attempt to recover from fatal errors. -This should only be used as a last resort, -as it typically results in leaked space, or worse. -. -.It Sy zfs_removal_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int -Ignore hard IO errors during device removal. -When set, if a device encounters a hard IO error during the removal process -the removal will not be cancelled. -This can result in a normally recoverable block becoming permanently damaged -and is hence not recommended. -This should only be used as a last resort when the -pool cannot be returned to a healthy state prior to removing the device. -. -.It Sy zfs_removal_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int -This is used by the test suite so that it can ensure that certain actions -happen while in the middle of a removal. -. -.It Sy zfs_remove_max_segment Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int -The largest contiguous segment that we will attempt to allocate when removing -a device. -If there is a performance problem with attempting to allocate large blocks, -consider decreasing this. -The default value is also the maximum. -. -.It Sy zfs_resilver_disable_defer Ns = Ns Sy 0 Ns | Ns 1 Pq int -Ignore the -.Sy resilver_defer -feature, causing an operation that would start a resilver to -immediately restart the one in progress. -. -.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3s Pc Pq int -Resilvers are processed by the sync thread. -While resilvering, it will spend at least this much time -working on a resilver between TXG flushes. -. -.It Sy zfs_scan_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int -If set, remove the DTL (dirty time list) upon completion of a pool scan (scrub), -even if there were unrepairable errors. -Intended to be used during pool repair or recovery to -stop resilvering when the pool is next imported. -. -.It Sy zfs_scrub_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int -Scrubs are processed by the sync thread. -While scrubbing, it will spend at least this much time -working on a scrub between TXG flushes. -. -.It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2h Pc Pq int -To preserve progress across reboots, the sequential scan algorithm periodically -needs to stop metadata scanning and issue all the verification I/O to disk. -The frequency of this flushing is determined by this tunable. -. -.It Sy zfs_scan_fill_weight Ns = Ns Sy 3 Pq int -This tunable affects how scrub and resilver I/O segments are ordered. -A higher number indicates that we care more about how filled in a segment is, -while a lower number indicates we care more about the size of the extent without -considering the gaps within a segment. -This value is only tunable upon module insertion. -Changing the value afterwards will have no affect on scrub or resilver performance. -. -.It Sy zfs_scan_issue_strategy Ns = Ns Sy 0 Pq int -Determines the order that data will be verified while scrubbing or resilvering: -.Bl -tag -compact -offset 4n -width "a" -.It Sy 1 -Data will be verified as sequentially as possible, given the -amount of memory reserved for scrubbing -.Pq see Sy zfs_scan_mem_lim_fact . -This may improve scrub performance if the pool's data is very fragmented. -.It Sy 2 -The largest mostly-contiguous chunk of found data will be verified first. -By deferring scrubbing of small segments, we may later find adjacent data -to coalesce and increase the segment size. -.It Sy 0 -.No Use strategy Sy 1 No during normal verification -.No and strategy Sy 2 No while taking a checkpoint. -.El -. -.It Sy zfs_scan_legacy Ns = Ns Sy 0 Ns | Ns 1 Pq int -If unset, indicates that scrubs and resilvers will gather metadata in -memory before issuing sequential I/O. -Otherwise indicates that the legacy algorithm will be used, -where I/O is initiated as soon as it is discovered. -Unsetting will not affect scrubs or resilvers that are already in progress. -. -.It Sy zfs_scan_max_ext_gap Ns = Ns Sy 2097152 Ns B Po 2MB Pc Pq int -Sets the largest gap in bytes between scrub/resilver I/O operations -that will still be considered sequential for sorting purposes. -Changing this value will not -affect scrubs or resilvers that are already in progress. -. -.It Sy zfs_scan_mem_lim_fact Ns = Ns Sy 20 Ns ^-1 Pq int -Maximum fraction of RAM used for I/O sorting by sequential scan algorithm. -This tunable determines the hard limit for I/O sorting memory usage. -When the hard limit is reached we stop scanning metadata and start issuing -data verification I/O. -This is done until we get below the soft limit. -. -.It Sy zfs_scan_mem_lim_soft_fact Ns = Ns Sy 20 Ns ^-1 Pq int -The fraction of the hard limit used to determined the soft limit for I/O sorting -by the sequential scan algorithm. -When we cross this limit from below no action is taken. -When we cross this limit from above it is because we are issuing verification I/O. -In this case (unless the metadata scan is done) we stop issuing verification I/O -and start scanning metadata again until we get to the hard limit. -. -.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int -Enforce tight memory limits on pool scans when a sequential scan is in progress. -When disabled, the memory limit may be exceeded by fast disks. -. -.It Sy zfs_scan_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int -Freezes a scrub/resilver in progress without actually pausing it. -Intended for testing/debugging. -. -.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int -Maximum amount of data that can be concurrently issued at once for scrubs and -resilvers per leaf device, given in bytes. -. -.It Sy zfs_send_corrupt_data Ns = Ns Sy 0 Ns | Ns 1 Pq int -Allow sending of corrupt data (ignore read/checksum errors when sending). -. -.It Sy zfs_send_unmodified_spill_blocks Ns = Ns Sy 1 Ns | Ns 0 Pq int -Include unmodified spill blocks in the send stream. -Under certain circumstances, previous versions of ZFS could incorrectly -remove the spill block from an existing object. -Including unmodified copies of the spill blocks creates a backwards-compatible -stream which will recreate a spill block if it was incorrectly removed. -. -.It Sy zfs_send_no_prefetch_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int -The fill fraction of the -.Nm zfs Cm send -internal queues. -The fill fraction controls the timing with which internal threads are woken up. -. -.It Sy zfs_send_no_prefetch_queue_length Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int -The maximum number of bytes allowed in -.Nm zfs Cm send Ns 's -internal queues. -. -.It Sy zfs_send_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int -The fill fraction of the -.Nm zfs Cm send -prefetch queue. -The fill fraction controls the timing with which internal threads are woken up. -. -.It Sy zfs_send_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int -The maximum number of bytes allowed that will be prefetched by -.Nm zfs Cm send . -This value must be at least twice the maximum block size in use. -. -.It Sy zfs_recv_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int -The fill fraction of the -.Nm zfs Cm receive -queue. -The fill fraction controls the timing with which internal threads are woken up. -. -.It Sy zfs_recv_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int -The maximum number of bytes allowed in the -.Nm zfs Cm receive -queue. -This value must be at least twice the maximum block size in use. -. -.It Sy zfs_recv_write_batch_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int -The maximum amount of data, in bytes, that -.Nm zfs Cm receive -will write in one DMU transaction. -This is the uncompressed size, even when receiving a compressed send stream. -This setting will not reduce the write size below a single block. -Capped at a maximum of -.Sy 32MB . -. -.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Ns | Ns 1 Pq ulong -Setting this variable overrides the default logic for estimating block -sizes when doing a -.Nm zfs Cm send . -The default heuristic is that the average block size -will be the current recordsize. -Override this value if most data in your dataset is not of that size -and you require accurate zfs send size estimates. -. -.It Sy zfs_sync_pass_deferred_free Ns = Ns Sy 2 Pq int -Flushing of data to disk is done in passes. -Defer frees starting in this pass. -. -.It Sy zfs_spa_discard_memory_limit Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int -Maximum memory used for prefetching a checkpoint's space map on each -vdev while discarding the checkpoint. -. -.It Sy zfs_special_class_metadata_reserve_pct Ns = Ns Sy 25 Ns % Pq int -Only allow small data blocks to be allocated on the special and dedup vdev -types when the available free space percentage on these vdevs exceeds this value. -This ensures reserved space is available for pool metadata as the -special vdevs approach capacity. -. -.It Sy zfs_sync_pass_dont_compress Ns = Ns Sy 8 Pq int -Starting in this sync pass, disable compression (including of metadata). -With the default setting, in practice, we don't have this many sync passes, -so this has no effect. -.Pp -The original intent was that disabling compression would help the sync passes -to converge. -However, in practice, disabling compression increases -the average number of sync passes; because when we turn compression off, -many blocks' size will change, and thus we have to re-allocate -(not overwrite) them. -It also increases the number of -.Em 128kB -allocations (e.g. for indirect blocks and spacemaps) -because these will not be compressed. -The -.Em 128kB -allocations are especially detrimental to performance -on highly fragmented systems, which may have very few free segments of this size, -and may need to load new metaslabs to satisfy these allocations. -. -.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq int -Rewrite new block pointers starting in this pass. -. -.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int -This controls the number of threads used by -.Sy dp_sync_taskq . -The default value of -.Sy 75% -will create a maximum of one thread per CPU. -. -.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq uint -Maximum size of TRIM command. -Larger ranges will be split into chunks no larger than this value before issuing. -. -.It Sy zfs_trim_extent_bytes_min Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq uint -Minimum size of TRIM commands. -TRIM ranges smaller than this will be skipped, -unless they're part of a larger range which was chunked. -This is done because it's common for these small TRIMs -to negatively impact overall performance. -. -.It Sy zfs_trim_metaslab_skip Ns = Ns Sy 0 Ns | Ns 1 Pq uint -Skip uninitialized metaslabs during the TRIM process. -This option is useful for pools constructed from large thinly-provisioned devices -where TRIM operations are slow. -As a pool ages, an increasing fraction of the pool's metaslabs -will be initialized, progressively degrading the usefulness of this option. -This setting is stored when starting a manual TRIM and will -persist for the duration of the requested TRIM. -. -.It Sy zfs_trim_queue_limit Ns = Ns Sy 10 Pq uint -Maximum number of queued TRIMs outstanding per leaf vdev. -The number of concurrent TRIM commands issued to the device is controlled by -.Sy zfs_vdev_trim_min_active No and Sy zfs_vdev_trim_max_active . -. -.It Sy zfs_trim_txg_batch Ns = Ns Sy 32 Pq uint -The number of transaction groups' worth of frees which should be aggregated -before TRIM operations are issued to the device. -This setting represents a trade-off between issuing larger, -more efficient TRIM operations and the delay -before the recently trimmed space is available for use by the device. -.Pp -Increasing this value will allow frees to be aggregated for a longer time. -This will result is larger TRIM operations and potentially increased memory usage. -Decreasing this value will have the opposite effect. -The default of -.Sy 32 -was determined to be a reasonable compromise. -. -.It Sy zfs_txg_history Ns = Ns Sy 0 Pq int -Historical statistics for this many latest TXGs will be available in -.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /TXGs . -. -.It Sy zfs_txg_timeout Ns = Ns Sy 5 Ns s Pq int -Flush dirty data to disk at least every this many seconds (maximum TXG duration). -. -.It Sy zfs_vdev_aggregate_trim Ns = Ns Sy 0 Ns | Ns 1 Pq int -Allow TRIM I/Os to be aggregated. -This is normally not helpful because the extents to be trimmed -will have been already been aggregated by the metaslab. -This option is provided for debugging and performance analysis. -. -.It Sy zfs_vdev_aggregation_limit Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int -Max vdev I/O aggregation size. -. -.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int -Max vdev I/O aggregation size for non-rotating media. -. -.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64kB Pc Pq int -Shift size to inflate reads to. -. -.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16kB Pc Pq int -Inflate reads smaller than this value to meet the -.Sy zfs_vdev_cache_bshift -size -.Pq default Sy 64kB . -. -.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq int -Total size of the per-disk cache in bytes. -.Pp -Currently this feature is disabled, as it has been found to not be helpful -for performance and in some cases harmful. -. -.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int -A number by which the balancing algorithm increments the load calculation for -the purpose of selecting the least busy mirror member when an I/O operation -immediately follows its predecessor on rotational vdevs -for the purpose of making decisions based on load. -. -.It Sy zfs_vdev_mirror_rotating_seek_inc Ns = Ns Sy 5 Pq int -A number by which the balancing algorithm increments the load calculation for -the purpose of selecting the least busy mirror member when an I/O operation -lacks locality as defined by -.Sy zfs_vdev_mirror_rotating_seek_offset . -Operations within this that are not immediately following the previous operation -are incremented by half. -. -.It Sy zfs_vdev_mirror_rotating_seek_offset Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int -The maximum distance for the last queued I/O operation in which -the balancing algorithm considers an operation to have locality. -.No See Sx ZFS I/O SCHEDULER . -. -.It Sy zfs_vdev_mirror_non_rotating_inc Ns = Ns Sy 0 Pq int -A number by which the balancing algorithm increments the load calculation for -the purpose of selecting the least busy mirror member on non-rotational vdevs -when I/O operations do not immediately follow one another. -. -.It Sy zfs_vdev_mirror_non_rotating_seek_inc Ns = Ns Sy 1 Pq int -A number by which the balancing algorithm increments the load calculation for -the purpose of selecting the least busy mirror member when an I/O operation lacks -locality as defined by the -.Sy zfs_vdev_mirror_rotating_seek_offset . -Operations within this that are not immediately following the previous operation -are incremented by half. -. -.It Sy zfs_vdev_read_gap_limit Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int -Aggregate read I/O operations if the on-disk gap between them is within this -threshold. -. -.It Sy zfs_vdev_write_gap_limit Ns = Ns Sy 4096 Ns B Po 4kB Pc Pq int -Aggregate write I/O operations if the on-disk gap between them is within this -threshold. -. -.It Sy zfs_vdev_raidz_impl Ns = Ns Sy fastest Pq string -Select the raidz parity implementation to use. -.Pp -Variants that don't depend on CPU-specific features -may be selected on module load, as they are supported on all systems. -The remaining options may only be set after the module is loaded, -as they are available only if the implementations are compiled in -and supported on the running system. -.Pp -Once the module is loaded, -.Pa /sys/module/zfs/parameters/zfs_vdev_raidz_impl -will show the available options, -with the currently selected one enclosed in square brackets. -.Pp -.TS -lb l l . -fastest selected by built-in benchmark -original original implementation -scalar scalar implementation -sse2 SSE2 instruction set 64-bit x86 -ssse3 SSSE3 instruction set 64-bit x86 -avx2 AVX2 instruction set 64-bit x86 -avx512f AVX512F instruction set 64-bit x86 -avx512bw AVX512F & AVX512BW instruction sets 64-bit x86 -aarch64_neon NEON Aarch64/64-bit ARMv8 -aarch64_neonx2 NEON with more unrolling Aarch64/64-bit ARMv8 -powerpc_altivec Altivec PowerPC -.TE -. -.It Sy zfs_vdev_scheduler Pq charp -.Sy DEPRECATED . -Prints warning to kernel log for compatiblity. -. -.It Sy zfs_zevent_len_max Ns = Ns Sy 512 Pq int -Max event queue length. -Events in the queue can be viewed with -.Xr zpool-events 8 . -. -.It Sy zfs_zevent_retain_max Ns = Ns Sy 2000 Pq int -Maximum recent zevent records to retain for duplicate checking. -Setting this to -.Sy 0 -disables duplicate detection. -. -.It Sy zfs_zevent_retain_expire_secs Ns = Ns Sy 900 Ns s Po 15min Pc Pq int -Lifespan for a recent ereport that was retained for duplicate checking. -. -.It Sy zfs_zil_clean_taskq_maxalloc Ns = Ns Sy 1048576 Pq int -The maximum number of taskq entries that are allowed to be cached. -When this limit is exceeded transaction records (itxs) -will be cleaned synchronously. -. -.It Sy zfs_zil_clean_taskq_minalloc Ns = Ns Sy 1024 Pq int -The number of taskq entries that are pre-populated when the taskq is first -created and are immediately available for use. -. -.It Sy zfs_zil_clean_taskq_nthr_pct Ns = Ns Sy 100 Ns % Pq int -This controls the number of threads used by -.Sy dp_zil_clean_taskq . -The default value of -.Sy 100% -will create a maximum of one thread per cpu. -. -.It Sy zil_maxblocksize Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int -This sets the maximum block size used by the ZIL. -On very fragmented pools, lowering this -.Pq typically to Sy 36kB -can improve performance. -. -.It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable the cache flush commands that are normally sent to disk by -the ZIL after an LWB write has completed. -Setting this will cause ZIL corruption on power loss -if a volatile out-of-order write cache is enabled. -. -.It Sy zil_replay_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int -Disable intent logging replay. -Can be disabled for recovery from corrupted ZIL. -. -.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768kB Pc Pq ulong -Limit SLOG write size per commit executed with synchronous priority. -Any writes above that will be executed with lower (asynchronous) priority -to limit potential SLOG device abuse by single active ZIL writer. -. -.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64 Pq int -Usually, one metaslab from each normal-class vdev is dedicated for use by -the ZIL to log synchronous writes. -However, if there are fewer than -.Sy zfs_embedded_slog_min_ms -metaslabs in the vdev, this functionality is disabled. -This ensures that we don't set aside an unreasonable amount of space for the ZIL. -. -.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int -If non-zero, the zio deadman will produce debugging messages -.Pq see Sy zfs_dbgmsg_enable -for all zios, rather than only for leaf zios possessing a vdev. -This is meant to be used by developers to gain -diagnostic information for hang conditions which don't involve a mutex -or other locking primitive: typically conditions in which a thread in -the zio pipeline is looping indefinitely. -. -.It Sy zio_slow_io_ms Ns = Ns Sy 30000 Ns ms Po 30s Pc Pq int -When an I/O operation takes more than this much time to complete, -it's marked as slow. -Each slow operation causes a delay zevent. -Slow I/O counters can be seen with -.Nm zpool Cm status Fl s . -. -.It Sy zio_dva_throttle_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Throttle block allocations in the I/O pipeline. -This allows for dynamic allocation distribution when devices are imbalanced. -When enabled, the maximum number of pending allocations per top-level vdev -is limited by -.Sy zfs_vdev_queue_depth_pct . -. -.It Sy zio_requeue_io_start_cut_in_line Ns = Ns Sy 0 Ns | Ns 1 Pq int -Prioritize requeued I/O. -. -.It Sy zio_taskq_batch_pct Ns = Ns Sy 80 Ns % Pq uint -Percentage of online CPUs which will run a worker thread for I/O. -These workers are responsible for I/O work such as compression and -checksum calculations. -Fractional number of CPUs will be rounded down. -.Pp -The default value of -.Sy 80% -was chosen to avoid using all CPUs which can result in -latency issues and inconsistent application performance, -especially when slower compression and/or checksumming is enabled. -. -.It Sy zio_taskq_batch_tpq Ns = Ns Sy 0 Pq uint -Number of worker threads per taskq. -Lower values improve I/O ordering and CPU utilization, -while higher reduces lock contention. -.Pp -If -.Sy 0 , -generate a system-dependent value close to 6 threads per taskq. -. -.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint -Do not create zvol device nodes. -This may slightly improve startup time on -systems with a very large number of zvols. -. -.It Sy zvol_major Ns = Ns Sy 230 Pq uint -Major number for zvol block devices. -. -.It Sy zvol_max_discard_blocks Ns = Ns Sy 16384 Pq ulong -Discard (TRIM) operations done on zvols will be done in batches of this -many blocks, where block size is determined by the -.Sy volblocksize -property of a zvol. -. -.It Sy zvol_prefetch_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq uint -When adding a zvol to the system, prefetch this many bytes -from the start and end of the volume. -Prefetching these regions of the volume is desirable, -because they are likely to be accessed immediately by -.Xr blkid 8 -or the kernel partitioner. -. -.It Sy zvol_request_sync Ns = Ns Sy 0 Ns | Ns 1 Pq uint -When processing I/O requests for a zvol, submit them synchronously. -This effectively limits the queue depth to -.Em 1 -for each I/O submitter. -When unset, requests are handled asynchronously by a thread pool. -The number of requests which can be handled concurrently is controlled by -.Sy zvol_threads . -. -.It Sy zvol_threads Ns = Ns Sy 32 Pq uint -Max number of threads which can handle zvol I/O requests concurrently. -. -.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint -Defines zvol block devices behaviour when -.Sy volmode Ns = Ns Sy default : -.Bl -tag -compact -offset 4n -width "a" -.It Sy 1 -.No equivalent to Sy full -.It Sy 2 -.No equivalent to Sy dev -.It Sy 3 -.No equivalent to Sy none -.El -.El -. -.Sh ZFS I/O SCHEDULER -ZFS issues I/O operations to leaf vdevs to satisfy and complete I/O operations. -The scheduler determines when and in what order those operations are issued. -The scheduler divides operations into five I/O classes, -prioritized in the following order: sync read, sync write, async read, -async write, and scrub/resilver. -Each queue defines the minimum and maximum number of concurrent operations -that may be issued to the device. -In addition, the device has an aggregate maximum, -.Sy zfs_vdev_max_active . -Note that the sum of the per-queue minima must not exceed the aggregate maximum. -If the sum of the per-queue maxima exceeds the aggregate maximum, -then the number of active operations may reach -.Sy zfs_vdev_max_active , -in which case no further operations will be issued, -regardless of whether all per-queue minima have been met. -.Pp -For many physical devices, throughput increases with the number of -concurrent operations, but latency typically suffers. -Furthermore, physical devices typically have a limit -at which more concurrent operations have no -effect on throughput or can actually cause it to decrease. -.Pp -The scheduler selects the next operation to issue by first looking for an -I/O class whose minimum has not been satisfied. -Once all are satisfied and the aggregate maximum has not been hit, -the scheduler looks for classes whose maximum has not been satisfied. -Iteration through the I/O classes is done in the order specified above. -No further operations are issued -if the aggregate maximum number of concurrent operations has been hit, -or if there are no operations queued for an I/O class that has not hit its maximum. -Every time an I/O operation is queued or an operation completes, -the scheduler looks for new operations to issue. -.Pp -In general, smaller -.Sy max_active Ns s -will lead to lower latency of synchronous operations. -Larger -.Sy max_active Ns s -may lead to higher overall throughput, depending on underlying storage. -.Pp -The ratio of the queues' -.Sy max_active Ns s -determines the balance of performance between reads, writes, and scrubs. -For example, increasing -.Sy zfs_vdev_scrub_max_active -will cause the scrub or resilver to complete more quickly, -but reads and writes to have higher latency and lower throughput. -.Pp -All I/O classes have a fixed maximum number of outstanding operations, -except for the async write class. -Asynchronous writes represent the data that is committed to stable storage -during the syncing stage for transaction groups. -Transaction groups enter the syncing state periodically, -so the number of queued async writes will quickly burst up -and then bleed down to zero. -Rather than servicing them as quickly as possible, -the I/O scheduler changes the maximum number of active async write operations -according to the amount of dirty data in the pool. -Since both throughput and latency typically increase with the number of -concurrent operations issued to physical devices, reducing the -burstiness in the number of concurrent operations also stabilizes the -response time of operations from other – and in particular synchronous – queues. -In broad strokes, the I/O scheduler will issue more concurrent operations -from the async write queue as there's more dirty data in the pool. -. -.Ss Async Writes -The number of concurrent operations issued for the async write I/O class -follows a piece-wise linear function defined by a few adjustable points: -.Bd -literal - | o---------| <-- \fBzfs_vdev_async_write_max_active\fP - ^ | /^ | - | | / | | -active | / | | - I/O | / | | -count | / | | - | / | | - |-------o | | <-- \fBzfs_vdev_async_write_min_active\fP - 0|_______^______|_________| - 0% | | 100% of \fBzfs_dirty_data_max\fP - | | - | `-- \fBzfs_vdev_async_write_active_max_dirty_percent\fP - `--------- \fBzfs_vdev_async_write_active_min_dirty_percent\fP -.Ed -.Pp -Until the amount of dirty data exceeds a minimum percentage of the dirty -data allowed in the pool, the I/O scheduler will limit the number of -concurrent operations to the minimum. -As that threshold is crossed, the number of concurrent operations issued -increases linearly to the maximum at the specified maximum percentage -of the dirty data allowed in the pool. -.Pp -Ideally, the amount of dirty data on a busy pool will stay in the sloped -part of the function between -.Sy zfs_vdev_async_write_active_min_dirty_percent -and -.Sy zfs_vdev_async_write_active_max_dirty_percent . -If it exceeds the maximum percentage, -this indicates that the rate of incoming data is -greater than the rate that the backend storage can handle. -In this case, we must further throttle incoming writes, -as described in the next section. -. -.Sh ZFS TRANSACTION DELAY -We delay transactions when we've determined that the backend storage -isn't able to accommodate the rate of incoming writes. -.Pp -If there is already a transaction waiting, we delay relative to when -that transaction will finish waiting. -This way the calculated delay time -is independent of the number of threads concurrently executing transactions. -.Pp -If we are the only waiter, wait relative to when the transaction started, -rather than the current time. -This credits the transaction for "time already served", -e.g. reading indirect blocks. -.Pp -The minimum time for a transaction to take is calculated as -.Dl min_time = min( Ns Sy zfs_delay_scale No * (dirty - min) / (max - dirty), 100ms) -.Pp -The delay has two degrees of freedom that can be adjusted via tunables. -The percentage of dirty data at which we start to delay is defined by -.Sy zfs_delay_min_dirty_percent . -This should typically be at or above -.Sy zfs_vdev_async_write_active_max_dirty_percent , -so that we only start to delay after writing at full speed -has failed to keep up with the incoming write rate. -The scale of the curve is defined by -.Sy zfs_delay_scale . -Roughly speaking, this variable determines the amount of delay at the midpoint of the curve. -.Bd -literal -delay - 10ms +-------------------------------------------------------------*+ - | *| - 9ms + *+ - | *| - 8ms + *+ - | * | - 7ms + * + - | * | - 6ms + * + - | * | - 5ms + * + - | * | - 4ms + * + - | * | - 3ms + * + - | * | - 2ms + (midpoint) * + - | | ** | - 1ms + v *** + - | \fBzfs_delay_scale\fP ----------> ******** | - 0 +-------------------------------------*********----------------+ - 0% <- \fBzfs_dirty_data_max\fP -> 100% -.Ed -.Pp -Note, that since the delay is added to the outstanding time remaining on the -most recent transaction it's effectively the inverse of IOPS. -Here, the midpoint of -.Em 500us -translates to -.Em 2000 IOPS . -The shape of the curve -was chosen such that small changes in the amount of accumulated dirty data -in the first three quarters of the curve yield relatively small differences -in the amount of delay. -.Pp -The effects can be easier to understand when the amount of delay is -represented on a logarithmic scale: -.Bd -literal -delay -100ms +-------------------------------------------------------------++ - + + - | | - + *+ - 10ms + *+ - + ** + - | (midpoint) ** | - + | ** + - 1ms + v **** + - + \fBzfs_delay_scale\fP ----------> ***** + - | **** | - + **** + -100us + ** + - + * + - | * | - + * + - 10us + * + - + + - | | - + + - +--------------------------------------------------------------+ - 0% <- \fBzfs_dirty_data_max\fP -> 100% -.Ed -.Pp -Note here that only as the amount of dirty data approaches its limit does -the delay start to increase rapidly. -The goal of a properly tuned system should be to keep the amount of dirty data -out of that range by first ensuring that the appropriate limits are set -for the I/O scheduler to reach optimal throughput on the back-end storage, -and then by changing the value of -.Sy zfs_delay_scale -to increase the steepness of the curve. diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 deleted file mode 100644 index 2b4cee545e88..000000000000 --- a/man/man5/zpool-features.5 +++ /dev/null @@ -1,842 +0,0 @@ -.\" -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. -.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. -.\" The contents of this file are subject to the terms of the Common Development -.\" and Distribution License (the "License"). You may not use this file except -.\" in compliance with the License. You can obtain a copy of the license at -.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" -.\" See the License for the specific language governing permissions and -.\" limitations under the License. When distributing Covered Code, include this -.\" CDDL HEADER in each file and include the License file at -.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this -.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your -.\" own identifying information: -.\" Portions Copyright [yyyy] [name of copyright owner] -.\" Copyright (c) 2019, Klara Inc. -.\" Copyright (c) 2019, Allan Jude -.\" Copyright (c) 2021, Colm Buckley -.\" -.Dd May 31, 2021 -.Dt ZPOOL-FEATURES 5 -.Os -. -.Sh NAME -.Nm zpool-features -.Nd description of ZFS pool features -. -.Sh DESCRIPTION -ZFS pool on-disk format versions are specified via "features" which replace -the old on-disk format numbers (the last supported on-disk format number is 28). -To enable a feature on a pool use the -.Nm zpool Cm upgrade , -or set the -.Sy feature Ns @ Ns Ar feature-name -property to -.Sy enabled . -Please also see the -.Sx Compatibility feature sets -section for information on how sets of features may be enabled together. -.Pp -The pool format does not affect file system version compatibility or the ability -to send file systems between pools. -.Pp -Since most features can be enabled independently of each other, the on-disk -format of the pool is specified by the set of all features marked as -.Sy active -on the pool. -If the pool was created by another software version -this set may include unsupported features. -. -.Ss Identifying features -Every feature has a GUID of the form -.Ar com.example : Ns Ar feature-name . -The reversed DNS name ensures that the feature's GUID is unique across all ZFS -implementations. -When unsupported features are encountered on a pool they will -be identified by their GUIDs. -Refer to the documentation for the ZFS -implementation that created the pool for information about those features. -.Pp -Each supported feature also has a short name. -By convention a feature's short name is the portion of its GUID which follows the -.Sq \&: -(i.e. -.Ar com.example : Ns Ar feature-name -would have the short name -.Ar feature-name ) , -however a feature's short name may differ across ZFS implementations if -following the convention would result in name conflicts. -. -.Ss Feature states -Features can be in one of three states: -.Bl -tag -width "disabled" -.It Sy active -This feature's on-disk format changes are in effect on the pool. -Support for this feature is required to import the pool in read-write mode. -If this feature is not read-only compatible, -support is also required to import the pool in read-only mode -.Pq see Sx Read-only compatibility . -.It Sy enabled -An administrator has marked this feature as enabled on the pool, but the -feature's on-disk format changes have not been made yet. -The pool can still be imported by software that does not support this feature, -but changes may be made to the on-disk format at any time -which will move the feature to the -.Sy active -state. -Some features may support returning to the -.Sy enabled -state after becoming -.Sy active . -See feature-specific documentation for details. -.It Sy disabled -This feature's on-disk format changes have not been made and will not be made -unless an administrator moves the feature to the -.Sy enabled -state. -Features cannot be disabled once they have been enabled. -.El -.Pp -The state of supported features is exposed through pool properties of the form -.Sy feature Ns @ Ns Ar short-name . -. -.Ss Read-only compatibility -Some features may make on-disk format changes that do not interfere with other -software's ability to read from the pool. -These features are referred to as -.Dq read-only compatible . -If all unsupported features on a pool are read-only compatible, -the pool can be imported in read-only mode by setting the -.Sy readonly -property during import (see -.Xr zpool-import 8 -for details on importing pools). -. -.Ss Unsupported features -For each unsupported feature enabled on an imported pool, a pool property -named -.Sy unsupported Ns @ Ns Ar feature-name -will indicate why the import was allowed despite the unsupported feature. -Possible values for this property are: -.Bl -tag -width "readonly" -.It Sy inactive -The feature is in the -.Sy enabled -state and therefore the pool's on-disk -format is still compatible with software that does not support this feature. -.It Sy readonly -The feature is read-only compatible and the pool has been imported in -read-only mode. -.El -. -.Ss Feature dependencies -Some features depend on other features being enabled in order to function. -Enabling a feature will automatically enable any features it depends on. -. -.Ss Compatibility feature sets -It is sometimes necessary for a pool to maintain compatibility with a -specific on-disk format, by enabling and disabling particular features. -The -.Sy compatibility -feature facilitates this by allowing feature sets to be read from text files. -When set to -.Sy off -(the default), compatibility feature sets are disabled -(i.e. all features are enabled); when set to -.Sy legacy , -no features are enabled. -When set to a comma-separated list of filenames -(each filename may either be an absolute path, or relative to -.Pa /etc/zfs/compatibility.d -or -.Pa /usr/share/zfs/compatibility.d ) , -the lists of requested features are read from those files, -separated by whitespace and/or commas. -Only features present in all files are enabled. -.Pp -Simple sanity checks are applied to the files: -they must be between 1B and 16kB in size, and must end with a newline character. -.Pp -The requested features are applied when a pool is created using -.Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar … -and controls which features are enabled when using -.Nm zpool Cm upgrade . -.Nm zpool Cm status -will not show a warning about disabled features which are not part -of the requested feature set. -.Pp -The special value -.Sy legacy -prevents any features from being enabled, either via -.Nm zpool Cm upgrade -or -.Nm zpool Cm set Sy feature Ns @ Ns Ar feature-name Ns = Ns Sy enabled . -This setting also prevents pools from being upgraded to newer on-disk versions. -This is a safety measure to prevent new features from being -accidentally enabled, breaking compatibility. -.Pp -By convention, compatibility files in -.Pa /usr/share/zfs/compatibility.d -are provided by the distribution, and include feature sets -supported by important versions of popular distributions, and feature -sets commonly supported at the start of each year. -Compatibility files in -.Pa /etc/zfs/compatibility.d , -if present, will take precedence over files with the same name in -.Pa /usr/share/zfs/compatibility.d . -.Pp -If an unrecognized feature is found in these files, an error message will -be shown. -If the unrecognized feature is in a file in -.Pa /etc/zfs/compatibility.d , -this is treated as an error and processing will stop. -If the unrecognized feature is under -.Pa /usr/share/zfs/compatibility.d , -this is treated as a warning and processing will continue. -This difference is to allow distributions to include features -which might not be recognized by the currently-installed binaries. -.Pp -Compatibility files may include comments: -any text from -.Sq # -to the end of the line is ignored. -.Pp -.Sy Example : -.Bd -literal -compact -offset 4n -.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2 -# Features which are supported by GRUB2 -async_destroy -bookmarks -embedded_data -empty_bpobj -enabled_txg -extensible_dataset -filesystem_limits -hole_birth -large_blocks -lz4_compress -spacemap_histogram - -.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev -.Ed -.Pp -See -.Xr zpool-create 8 -and -.Xr zpool-upgrade 8 -for more information on how these commands are affected by feature sets. -. -.de feature -.It Sy \\$2 -.Bl -tag -compact -width "READ-ONLY COMPATIBLE" -.It GUID -.Sy \\$1:\\$2 -.if !"\\$4"" \{\ -.It DEPENDENCIES -\fB\\$4\fP\c -.if !"\\$5"" , \fB\\$5\fP\c -.if !"\\$6"" , \fB\\$6\fP\c -.if !"\\$7"" , \fB\\$7\fP\c -.if !"\\$8"" , \fB\\$8\fP\c -.if !"\\$9"" , \fB\\$9\fP\c -.\} -.It READ-ONLY COMPATIBLE -\\$3 -.El -.Pp -.. -. -.ds instant-never \ -.No This feature becomes Sy active No as soon as it is enabled \ -and will never return to being Sy enabled . -. -.ds remount-upgrade \ -.No Each filesystem will be upgraded automatically when remounted, \ -or when a new file is created under that filesystem. \ -The upgrade can also be triggered on filesystems via \ -Nm zfs Cm set Sy version Ns = Ns Sy current Ar fs . \ -No The upgrade process runs in the background and may take a while to complete \ -for filesystems containing large amounts of files. -. -.de checksum-spiel -When the -.Sy \\$1 -feature is set to -.Sy enabled , -the administrator can turn on the -.Sy \\$1 -checksum on any dataset using -.Nm zfs Cm set Sy checksum Ns = Ns Sy \\$1 Ar dset -.Po see Xr zfs-set 8 Pc . -This feature becomes -.Sy active -once a -.Sy checksum -property has been set to -.Sy \\$1 , -and will return to being -.Sy enabled -once all filesystems that have ever had their checksum set to -.Sy \\$1 -are destroyed. -.. -. -.Sh FEATURES -The following features are supported on this system: -.Bl -tag -width Ds -.feature org.zfsonlinux allocation_classes yes -This feature enables support for separate allocation classes. -.Pp -This feature becomes -.Sy active -when a dedicated allocation class vdev (dedup or special) is created with the -.Nm zpool Cm create No or Nm zpool Cm add No commands . -With device removal, it can be returned to the -.Sy enabled -state if all the dedicated allocation class vdevs are removed. -. -.feature com.delphix async_destroy yes -Destroying a file system requires traversing all of its data in order to -return its used space to the pool. -Without -.Sy async_destroy , -the file system is not fully removed until all space has been reclaimed. -If the destroy operation is interrupted by a reboot or power outage, -the next attempt to open the pool will need to complete the destroy -operation synchronously. -.Pp -When -.Sy async_destroy -is enabled, the file system's data will be reclaimed by a background process, -allowing the destroy operation to complete -without traversing the entire file system. -The background process is able to resume -interrupted destroys after the pool has been opened, eliminating the need -to finish interrupted destroys as part of the open operation. -The amount of space remaining to be reclaimed by the background process -is available through the -.Sy freeing -property. -.Pp -This feature is only -.Sy active -while -.Sy freeing -is non-zero. -. -.feature com.delphix bookmarks yes extensible_dataset -This feature enables use of the -.Nm zfs Cm bookmark -command. -.Pp -This feature is -.Sy active -while any bookmarks exist in the pool. -All bookmarks in the pool can be listed by running -.Nm zfs Cm list Fl t Sy bookmark Fl r Ar poolname . -. -.feature com.datto bookmark_v2 no bookmark extensible_dataset -This feature enables the creation and management of larger bookmarks which are -needed for other features in ZFS. -.Pp -This feature becomes -.Sy active -when a v2 bookmark is created and will be returned to the -.Sy enabled -state when all v2 bookmarks are destroyed. -. -.feature com.delphix bookmark_written no bookmark extensible_dataset bookmark_v2 -This feature enables additional bookmark accounting fields, enabling the -.Sy written Ns # Ns Ar bookmark -property (space written since a bookmark) and estimates of -send stream sizes for incrementals from bookmarks. -.Pp -This feature becomes -.Sy active -when a bookmark is created and will be -returned to the -.Sy enabled -state when all bookmarks with these fields are destroyed. -. -.feature org.openzfs device_rebuild yes -This feature enables the ability for the -.Nm zpool Cm attach -and -.Nm zpool Cm replace -commands to perform sequential reconstruction -(instead of healing reconstruction) when resilvering. -.Pp -Sequential reconstruction resilvers a device in LBA order without immediately -verifying the checksums. -Once complete, a scrub is started, which then verifies the checksums. -This approach allows full redundancy to be restored to the pool -in the minimum amount of time. -This two-phase approach will take longer than a healing resilver -when the time to verify the checksums is included. -However, unless there is additional pool damage, -no checksum errors should be reported by the scrub. -This feature is incompatible with raidz configurations. -. -This feature becomes -.Sy active -while a sequential resilver is in progress, and returns to -.Sy enabled -when the resilver completes. -. -.feature com.delphix device_removal no -This feature enables the -.Nm zpool Cm remove -command to remove top-level vdevs, -evacuating them to reduce the total size of the pool. -.Pp -This feature becomes -.Sy active -when the -.Nm zpool Cm remove -command is used -on a top-level vdev, and will never return to being -.Sy enabled . -. -.feature org.openzfs draid no -This feature enables use of the -.Sy draid -vdev type. -dRAID is a variant of raidz which provides integrated distributed -hot spares that allow faster resilvering while retaining the benefits of raidz. -Data, parity, and spare space are organized in redundancy groups -and distributed evenly over all of the devices. -.Pp -This feature becomes -.Sy active -when creating a pool which uses the -.Sy draid -vdev type, or when adding a new -.Sy draid -vdev to an existing pool. -. -.feature org.illumos edonr no extensible_dataset -This feature enables the use of the Edon-R hash algorithm for checksum, -including for nopwrite (if compression is also enabled, an overwrite of -a block whose checksum matches the data being written will be ignored). -In an abundance of caution, Edon-R requires verification when used with -dedup: -.Nm zfs Cm set Sy dedup Ns = Ns Sy edonr , Ns Sy verify -.Po see Xr zfs-set 8 Pc . -.Pp -Edon-R is a very high-performance hash algorithm that was part -of the NIST SHA-3 competition. -It provides extremely high hash performance (over 350% faster than SHA-256), -but was not selected because of its unsuitability -as a general purpose secure hash algorithm. -This implementation utilizes the new salted checksumming functionality -in ZFS, which means that the checksum is pre-seeded with a secret -256-bit random key (stored on the pool) before being fed the data block -to be checksummed. -Thus the produced checksums are unique to a given pool, -preventing hash collision attacks on systems with dedup. -.Pp -.checksum-spiel edonr -.Pp -.Fx does not support the Sy edonr No feature. -. -.feature com.delphix embedded_data no -This feature improves the performance and compression ratio of -highly-compressible blocks. -Blocks whose contents can compress to 112 bytes -or smaller can take advantage of this feature. -.Pp -When this feature is enabled, the contents of highly-compressible blocks are -stored in the block "pointer" itself (a misnomer in this case, as it contains -the compressed data, rather than a pointer to its location on disk). -Thus the space of the block (one sector, typically 512B or 4kB) is saved, -and no additional I/O is needed to read and write the data block. -. -\*[instant-never] -. -.feature com.delphix empty_bpobj yes -This feature increases the performance of creating and using a large -number of snapshots of a single filesystem or volume, and also reduces -the disk space required. -.Pp -When there are many snapshots, each snapshot uses many Block Pointer -Objects (bpobjs) to track blocks associated with that snapshot. -However, in common use cases, most of these bpobjs are empty. -This feature allows us to create each bpobj on-demand, -thus eliminating the empty bpobjs. -.Pp -This feature is -.Sy active -while there are any filesystems, volumes, -or snapshots which were created after enabling this feature. -. -.feature com.delphix enabled_txg yes -Once this feature is enabled, ZFS records the transaction group number -in which new features are enabled. -This has no user-visible impact, but other features may depend on this feature. -.Pp -This feature becomes -.Sy active - as soon as it is enabled and will -never return to being -.Sy enabled . -. -.feature com.datto encryption no bookmark_v2 extensible_dataset -This feature enables the creation and management of natively encrypted datasets. -.Pp -This feature becomes -.Sy active -when an encrypted dataset is created and will be returned to the -.Sy enabled -state when all datasets that use this feature are destroyed. -. -.feature com.delphix extensible_dataset no -This feature allows more flexible use of internal ZFS data structures, -and exists for other features to depend on. -.Pp -This feature will be -.Sy active -when the first dependent feature uses it, and will be returned to the -.Sy enabled -state when all datasets that use this feature are destroyed. -. -.feature com.joyent filesystem_limits yes extensible_dataset -This feature enables filesystem and snapshot limits. -These limits can be used to control how many filesystems and/or snapshots -can be created at the point in the tree on which the limits are set. -.Pp -This feature is -.Sy active -once either of the limit properties has been set on a dataset. -Once activated the feature is never deactivated. -. -.feature com.delphix hole_birth no enabled_txg -This feature has/had bugs, the result of which is that, if you do a -.Nm zfs Cm send Fl i -.Pq or Fl R , No since it uses Fl i -from an affected dataset, the receiving party will not see any checksum -or other errors, but the resulting destination snapshot -will not match the source. -Its use by -.Nm zfs Cm send Fl i -has been disabled by default -.Pq see Sy send_holes_without_birth_time No in Xr zfs-module-parameters 5 . -.Pp -This feature improves performance of incremental sends -.Pq Nm zfs Cm send Fl i -and receives for objects with many holes. -The most common case of hole-filled objects is zvols. -.Pp -An incremental send stream from snapshot -.Sy A No to snapshot Sy B -contains information about every block that changed between -.Sy A No and Sy B . -Blocks which did not change between those snapshots can be -identified and omitted from the stream using a piece of metadata called -the "block birth time", but birth times are not recorded for holes -(blocks filled only with zeroes). -Since holes created after -.Sy A No cannot be distinguished from holes created before Sy A , -information about every hole in the entire filesystem or zvol -is included in the send stream. -.Pp -For workloads where holes are rare this is not a problem. -However, when incrementally replicating filesystems or zvols with many holes -(for example a zvol formatted with another filesystem) a lot of time will -be spent sending and receiving unnecessary information about holes that -already exist on the receiving side. -.Pp -Once the -.Sy hole_birth -feature has been enabled the block birth times -of all new holes will be recorded. -Incremental sends between snapshots created after this feature is enabled -will use this new metadata to avoid sending information about holes that -already exist on the receiving side. -.Pp -\*[instant-never] -. -.feature org.open-zfs large_blocks no extensible_dataset -This feature allows the record size on a dataset to be set larger than 128kB. -.Pp -This feature becomes -.Sy active -once a dataset contains a file with a block size larger than 128kB, -and will return to being -.Sy enabled -once all filesystems that have ever had their recordsize larger than 128kB -are destroyed. -. -.feature org.zfsonlinux large_dnode no extensible_dataset -This feature allows the size of dnodes in a dataset to be set larger than 512B. -. -This feature becomes -.Sy active -once a dataset contains an object with a dnode larger than 512B, -which occurs as a result of setting the -.Sy dnodesize -dataset property to a value other than -.Sy legacy . -The feature will return to being -.Sy enabled -once all filesystems that have ever contained a dnode larger than 512B -are destroyed. -Large dnodes allow more data to be stored in the bonus buffer, -thus potentially improving performance by avoiding the use of spill blocks. -. -.feature com.delphix livelist yes -This feature allows clones to be deleted faster than the traditional method -when a large number of random/sparse writes have been made to the clone. -All blocks allocated and freed after a clone is created are tracked by the -the clone's livelist which is referenced during the deletion of the clone. -The feature is activated when a clone is created and remains -.Sy active -until all clones have been destroyed. -. -.feature com.delphix log_spacemap yes com.delphix:spacemap_v2 -This feature improves performance for heavily-fragmented pools, -especially when workloads are heavy in random-writes. -It does so by logging all the metaslab changes on a single spacemap every TXG -instead of scattering multiple writes to all the metaslab spacemaps. -.Pp -\*[instant-never] -. -.feature org.illumos lz4_compress no -.Sy lz4 -is a high-performance real-time compression algorithm that -features significantly faster compression and decompression as well as a -higher compression ratio than the older -.Sy lzjb -compression. -Typically, -.Sy lz4 -compression is approximately 50% faster on compressible data and 200% faster -on incompressible data than -.Sy lzjb . -It is also approximately 80% faster on decompression, -while giving approximately a 10% better compression ratio. -.Pp -When the -.Sy lz4_compress -feature is set to -.Sy enabled , -the administrator can turn on -.Sy lz4 -compression on any dataset on the pool using the -.Xr zfs-set 8 -command. -All newly written metadata will be compressed with the -.Sy lz4 -algorithm. -.Pp -\*[instant-never] -. -.feature com.joyent multi_vdev_crash_dump no -This feature allows a dump device to be configured with a pool comprised -of multiple vdevs. -Those vdevs may be arranged in any mirrored or raidz configuration. -.Pp -When the -.Sy multi_vdev_crash_dump -feature is set to -.Sy enabled , -the administrator can use -.Xr dumpadm 1M -to configure a dump device on a pool comprised of multiple vdevs. -.Pp -Under -.Fx -and Linux this feature is unused, but registered for compatibility. -New pools created on these systems will have the feature -.Sy enabled -but will never transition to -.Sy active , -as this functionality is not required for crash dump support. -Existing pools where this feature is -.Sy active -can be imported. -. -.feature com.delphix obsolete_counts yes device_removal -This feature is an enhancement of -.Sy device_removal , -which will over time reduce the memory used to track removed devices. -When indirect blocks are freed or remapped, -we note that their part of the indirect mapping is "obsolete" – no longer needed. -.Pp -This feature becomes -.Sy active -when the -.Nm zpool Cm remove -command is used on a top-level vdev, and will never return to being -.Sy enabled . -. -.feature org.zfsonlinux project_quota yes extensible_dataset -This feature allows administrators to account the spaces and objects usage -information against the project identifier (ID). -.Pp -The project ID is an object-based attribute. -When upgrading an existing filesystem, -objects without a project ID will be assigned a zero project ID. -When this feature is enabled, newly created objects inherit -their parent directories' project ID if the parent's inherit flag is set -.Pq via Nm chattr Sy [+-]P No or Nm zfs Cm project Fl s Ns | Ns Fl C . -Otherwise, the new object's project ID will be zero. -An object's project ID can be changed at any time by the owner -(or privileged user) via -.Nm chattr Fl p Ar prjid -or -.Nm zfs Cm project Fl p Ar prjid . -.Pp -This feature will become -.Sy active -as soon as it is enabled and will never return to being -.Sy disabled . -\*[remount-upgrade] -. -.feature com.delphix redaction_bookmarks no bookmarks extensible_dataset -This feature enables the use of redacted -.Nm zfs Cm send Ns s , -which create redaction bookmarks storing the list of blocks -redacted by the send that created them. -For more information about redacted sends, see -.Xr zfs-send 8 . -. -.feature com.delphix redacted_datasets no extensible_dataset -This feature enables the receiving of redacted -.Nm zfs Cm send Ns -streams. which create redacted datasets when received. -These datasets are missing some of their blocks, -and so cannot be safely mounted, and their contents cannot be safely read. -For more information about redacted receives, see -.Xr zfs-send 8 . -. -.feature com.datto resilver_defer yes -This feature allows ZFS to postpone new resilvers if an existing one is already -in progress. -Without this feature, any new resilvers will cause the currently -running one to be immediately restarted from the beginning. -.Pp -This feature becomes -.Sy active -once a resilver has been deferred, and returns to being -.Sy enabled -when the deferred resilver begins. -. -.feature org.illumos sha512 no extensible_dataset -This feature enables the use of the SHA-512/256 truncated hash algorithm -(FIPS 180-4) for checksum and dedup. -The native 64-bit arithmetic of SHA-512 provides an approximate 50% -performance boost over SHA-256 on 64-bit hardware -and is thus a good minimum-change replacement candidate -for systems where hash performance is important, -but these systems cannot for whatever reason utilize the faster -.Sy skein No and Sy edonr -algorithms. -.Pp -.checksum-spiel sha512 -. -.feature org.illumos skein no extensible_dataset -This feature enables the use of the Skein hash algorithm for checksum and dedup. -Skein is a high-performance secure hash algorithm that was a -finalist in the NIST SHA-3 competition. -It provides a very high security margin and high performance on 64-bit hardware -(80% faster than SHA-256). -This implementation also utilizes the new salted checksumming -functionality in ZFS, which means that the checksum is pre-seeded with a -secret 256-bit random key (stored on the pool) before being fed the data -block to be checksummed. -Thus the produced checksums are unique to a given pool, -preventing hash collision attacks on systems with dedup. -.Pp -.checksum-spiel skein -. -.feature com.delphix spacemap_histogram yes -This features allows ZFS to maintain more information about how free space -is organized within the pool. -If this feature is -.Sy enabled , -it will be activated when a new space map object is created, or -an existing space map is upgraded to the new format, -and never returns back to being -.Sy enabled . -. -.feature com.delphix spacemap_v2 yes -This feature enables the use of the new space map encoding which -consists of two words (instead of one) whenever it is advantageous. -The new encoding allows space maps to represent large regions of -space more efficiently on-disk while also increasing their maximum -addressable offset. -.Pp -This feature becomes -.Sy active -once it is -.Sy enabled , -and never returns back to being -.Sy enabled . -. -.feature org.zfsonlinux userobj_accounting yes extensible_dataset -This feature allows administrators to account the object usage information -by user and group. -.Pp -\*[instant-never] -\*[remount-upgrade] -. -.feature com.delphix zpool_checkpoint yes -This feature enables the -.Nm zpool Cm checkpoint -command that can checkpoint the state of the pool -at the time it was issued and later rewind back to it or discard it. -.Pp -This feature becomes -.Sy active -when the -.Nm zpool Cm checkpoint -command is used to checkpoint the pool. -The feature will only return back to being -.Sy enabled -when the pool is rewound or the checkpoint has been discarded. -. -.feature org.freebsd zstd_compress no extensible_dataset -.Sy zstd -is a high-performance compression algorithm that features a -combination of high compression ratios and high speed. -Compared to -.Sy gzip , -.Sy zstd -offers slightly better compression at much higher speeds. -Compared to -.Sy lz4 , -.Sy zstd -offers much better compression while being only modestly slower. -Typically, -.Sy zstd -compression speed ranges from 250 to 500 MB/s per thread -and decompression speed is over 1 GB/s per thread. -.Pp -When the -.Sy zstd -feature is set to -.Sy enabled , -the administrator can turn on -.Sy zstd -compression of any dataset using -.Nm zfs Cm set Sy compress Ns = Ns Sy zstd Ar dset -.Po see Xr zfs-set 8 Pc . -This feature becomes -.Sy active -once a -.Sy compress -property has been set to -.Sy zstd , -and will return to being -.Sy enabled -once all filesystems that have ever had their -.Sy compress -property set to -.Sy zstd -are destroyed. -.El -. -.Sh SEE ALSO -.Xr zpool 8 diff --git a/man/man7/zfsconcepts.7 b/man/man7/zfsconcepts.7 new file mode 100644 index 000000000000..f958035f72df --- /dev/null +++ b/man/man7/zfsconcepts.7 @@ -0,0 +1,206 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. +.\" Copyright (c) 2014 Integros [integros.com] +.\" Copyright 2019 Richard Laager. All rights reserved. +.\" Copyright 2018 Nexenta Systems, Inc. +.\" Copyright 2019 Joyent, Inc. +.\" +.Dd June 30, 2019 +.Dt ZFSCONCEPTS 7 +.Os +. +.Sh NAME +.Nm zfsconcepts +.Nd overview of ZFS concepts +. +.Sh DESCRIPTION +.Ss ZFS File System Hierarchy +A ZFS storage pool is a logical collection of devices that provide space for +datasets. +A storage pool is also the root of the ZFS file system hierarchy. +.Pp +The root of the pool can be accessed as a file system, such as mounting and +unmounting, taking snapshots, and setting properties. +The physical storage characteristics, however, are managed by the +.Xr zpool 8 +command. +.Pp +See +.Xr zpool 8 +for more information on creating and administering pools. +.Ss Snapshots +A snapshot is a read-only copy of a file system or volume. +Snapshots can be created extremely quickly, and initially consume no additional +space within the pool. +As data within the active dataset changes, the snapshot consumes more data than +would otherwise be shared with the active dataset. +.Pp +Snapshots can have arbitrary names. +Snapshots of volumes can be cloned or rolled back, visibility is determined +by the +.Sy snapdev +property of the parent volume. +.Pp +File system snapshots can be accessed under the +.Pa .zfs/snapshot +directory in the root of the file system. +Snapshots are automatically mounted on demand and may be unmounted at regular +intervals. +The visibility of the +.Pa .zfs +directory can be controlled by the +.Sy snapdir +property. +.Ss Bookmarks +A bookmark is like a snapshot, a read-only copy of a file system or volume. +Bookmarks can be created extremely quickly, compared to snapshots, and they +consume no additional space within the pool. +Bookmarks can also have arbitrary names, much like snapshots. +.Pp +Unlike snapshots, bookmarks can not be accessed through the filesystem in any way. +From a storage standpoint a bookmark just provides a way to reference +when a snapshot was created as a distinct object. +Bookmarks are initially tied to a snapshot, not the filesystem or volume, +and they will survive if the snapshot itself is destroyed. +Since they are very light weight there's little incentive to destroy them. +.Ss Clones +A clone is a writable volume or file system whose initial contents are the same +as another dataset. +As with snapshots, creating a clone is nearly instantaneous, and initially +consumes no additional space. +.Pp +Clones can only be created from a snapshot. +When a snapshot is cloned, it creates an implicit dependency between the parent +and child. +Even though the clone is created somewhere else in the dataset hierarchy, the +original snapshot cannot be destroyed as long as a clone exists. +The +.Sy origin +property exposes this dependency, and the +.Cm destroy +command lists any such dependencies, if they exist. +.Pp +The clone parent-child dependency relationship can be reversed by using the +.Cm promote +subcommand. +This causes the +.Qq origin +file system to become a clone of the specified file system, which makes it +possible to destroy the file system that the clone was created from. +.Ss "Mount Points" +Creating a ZFS file system is a simple operation, so the number of file systems +per system is likely to be numerous. +To cope with this, ZFS automatically manages mounting and unmounting file +systems without the need to edit the +.Pa /etc/fstab +file. +All automatically managed file systems are mounted by ZFS at boot time. +.Pp +By default, file systems are mounted under +.Pa /path , +where +.Ar path +is the name of the file system in the ZFS namespace. +Directories are created and destroyed as needed. +.Pp +A file system can also have a mount point set in the +.Sy mountpoint +property. +This directory is created as needed, and ZFS automatically mounts the file +system when the +.Nm zfs Cm mount Fl a +command is invoked +.Po without editing +.Pa /etc/fstab +.Pc . +The +.Sy mountpoint +property can be inherited, so if +.Em pool/home +has a mount point of +.Pa /export/stuff , +then +.Em pool/home/user +automatically inherits a mount point of +.Pa /export/stuff/user . +.Pp +A file system +.Sy mountpoint +property of +.Sy none +prevents the file system from being mounted. +.Pp +If needed, ZFS file systems can also be managed with traditional tools +.Po +.Nm mount , +.Nm umount , +.Pa /etc/fstab +.Pc . +If a file system's mount point is set to +.Sy legacy , +ZFS makes no attempt to manage the file system, and the administrator is +responsible for mounting and unmounting the file system. +Because pools must +be imported before a legacy mount can succeed, administrators should ensure +that legacy mounts are only attempted after the zpool import process +finishes at boot time. +For example, on machines using systemd, the mount option +.Pp +.Nm x-systemd.requires=zfs-import.target +.Pp +will ensure that the zfs-import completes before systemd attempts mounting +the filesystem. +See +.Xr systemd.mount 5 +for details. +.Ss Deduplication +Deduplication is the process for removing redundant data at the block level, +reducing the total amount of data stored. +If a file system has the +.Sy dedup +property enabled, duplicate data blocks are removed synchronously. +The result +is that only unique data is stored and common components are shared among files. +.Pp +Deduplicating data is a very resource-intensive operation. +It is generally recommended that you have at least 1.25 GiB of RAM +per 1 TiB of storage when you enable deduplication. +Calculating the exact requirement depends heavily +on the type of data stored in the pool. +.Pp +Enabling deduplication on an improperly-designed system can result in +performance issues (slow IO and administrative operations). +It can potentially lead to problems importing a pool due to memory exhaustion. +Deduplication can consume significant processing power (CPU) and memory as well +as generate additional disk IO. +.Pp +Before creating a pool with deduplication enabled, ensure that you have planned +your hardware requirements appropriately and implemented appropriate recovery +practices, such as regular backups. +Consider using the +.Sy compression +property as a less resource-intensive alternative. diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 new file mode 100644 index 000000000000..3f3ddcebf320 --- /dev/null +++ b/man/man7/zfsprops.7 @@ -0,0 +1,2067 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. +.\" Copyright (c) 2011, Pawel Jakub Dawidek +.\" Copyright (c) 2012, Glen Barber +.\" Copyright (c) 2012, Bryan Drewery +.\" Copyright (c) 2013, Steven Hartland +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. +.\" Copyright (c) 2014 Integros [integros.com] +.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright (c) 2014, Xin LI +.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. +.\" Copyright 2019 Richard Laager. All rights reserved. +.\" Copyright 2018 Nexenta Systems, Inc. +.\" Copyright 2019 Joyent, Inc. +.\" Copyright (c) 2019, Kjeld Schouten-Lebbing +.\" +.Dd May 24, 2021 +.Dt ZFSPROPS 7 +.Os +. +.Sh NAME +.Nm zfsprops +.Nd native and user-defined properties of ZFS datasets +. +.Sh DESCRIPTION +Properties are divided into two types, native properties and user-defined +.Po or +.Qq user +.Pc +properties. +Native properties either export internal statistics or control ZFS behavior. +In addition, native properties are either editable or read-only. +User properties have no effect on ZFS behavior, but you can use them to annotate +datasets in a way that is meaningful in your environment. +For more information about user properties, see the +.Sx User Properties +section, below. +. +.Ss Native Properties +Every dataset has a set of properties that export statistics about the dataset +as well as control various behaviors. +Properties are inherited from the parent unless overridden by the child. +Some properties apply only to certain types of datasets +.Pq file systems, volumes, or snapshots . +.Pp +The values of numeric properties can be specified using human-readable suffixes +.Po for example, +.Sy k , +.Sy KB , +.Sy M , +.Sy Gb , +and so forth, up to +.Sy Z +for zettabyte +.Pc . +The following are all valid +.Pq and equal +specifications: +.Li 1536M, 1.5g, 1.50GB . +.Pp +The values of non-numeric properties are case sensitive and must be lowercase, +except for +.Sy mountpoint , +.Sy sharenfs , +and +.Sy sharesmb . +.Pp +The following native properties consist of read-only statistics about the +dataset. +These properties can be neither set, nor inherited. +Native properties apply to all dataset types unless otherwise noted. +.Bl -tag -width "usedbyrefreservation" +.It Sy available +The amount of space available to the dataset and all its children, assuming that +there is no other activity in the pool. +Because space is shared within a pool, availability can be limited by any number +of factors, including physical pool size, quotas, reservations, or other +datasets within the pool. +.Pp +This property can also be referred to by its shortened column name, +.Sy avail . +.It Sy compressratio +For non-snapshots, the compression ratio achieved for the +.Sy used +space of this dataset, expressed as a multiplier. +The +.Sy used +property includes descendant datasets, and, for clones, does not include the +space shared with the origin snapshot. +For snapshots, the +.Sy compressratio +is the same as the +.Sy refcompressratio +property. +Compression can be turned on by running: +.Nm zfs Cm set Sy compression Ns = Ns Sy on Ar dataset . +The default value is +.Sy off . +.It Sy createtxg +The transaction group (txg) in which the dataset was created. +Bookmarks have the same +.Sy createtxg +as the snapshot they are initially tied to. +This property is suitable for ordering a list of snapshots, +e.g. for incremental send and receive. +.It Sy creation +The time this dataset was created. +.It Sy clones +For snapshots, this property is a comma-separated list of filesystems or volumes +which are clones of this snapshot. +The clones' +.Sy origin +property is this snapshot. +If the +.Sy clones +property is not empty, then this snapshot can not be destroyed +.Po even with the +.Fl r +or +.Fl f +options +.Pc . +The roles of origin and clone can be swapped by promoting the clone with the +.Nm zfs Cm promote +command. +.It Sy defer_destroy +This property is +.Sy on +if the snapshot has been marked for deferred destroy by using the +.Nm zfs Cm destroy Fl d +command. +Otherwise, the property is +.Sy off . +.It Sy encryptionroot +For encrypted datasets, indicates where the dataset is currently inheriting its +encryption key from. +Loading or unloading a key for the +.Sy encryptionroot +will implicitly load / unload the key for any inheriting datasets (see +.Nm zfs Cm load-key +and +.Nm zfs Cm unload-key +for details). +Clones will always share an +encryption key with their origin. +See the +.Sx Encryption +section of +.Xr zfs-load-key 8 +for details. +.It Sy filesystem_count +The total number of filesystems and volumes that exist under this location in +the dataset tree. +This value is only available when a +.Sy filesystem_limit +has been set somewhere in the tree under which the dataset resides. +.It Sy keystatus +Indicates if an encryption key is currently loaded into ZFS. +The possible values are +.Sy none , +.Sy available , +and +.Sy unavailable . +See +.Nm zfs Cm load-key +and +.Nm zfs Cm unload-key . +.It Sy guid +The 64 bit GUID of this dataset or bookmark which does not change over its +entire lifetime. +When a snapshot is sent to another pool, the received snapshot has the same GUID. +Thus, the +.Sy guid +is suitable to identify a snapshot across pools. +.It Sy logicalreferenced +The amount of space that is +.Qq logically +accessible by this dataset. +See the +.Sy referenced +property. +The logical space ignores the effect of the +.Sy compression +and +.Sy copies +properties, giving a quantity closer to the amount of data that applications +see. +However, it does include space consumed by metadata. +.Pp +This property can also be referred to by its shortened column name, +.Sy lrefer . +.It Sy logicalused +The amount of space that is +.Qq logically +consumed by this dataset and all its descendents. +See the +.Sy used +property. +The logical space ignores the effect of the +.Sy compression +and +.Sy copies +properties, giving a quantity closer to the amount of data that applications +see. +However, it does include space consumed by metadata. +.Pp +This property can also be referred to by its shortened column name, +.Sy lused . +.It Sy mounted +For file systems, indicates whether the file system is currently mounted. +This property can be either +.Sy yes +or +.Sy no . +.It Sy objsetid +A unique identifier for this dataset within the pool. +Unlike the dataset's +.Sy guid , No the Sy objsetid +of a dataset is not transferred to other pools when the snapshot is copied +with a send/receive operation. +The +.Sy objsetid +can be reused (for a new dataset) after the dataset is deleted. +.It Sy origin +For cloned file systems or volumes, the snapshot from which the clone was +created. +See also the +.Sy clones +property. +.It Sy receive_resume_token +For filesystems or volumes which have saved partially-completed state from +.Nm zfs Cm receive Fl s , +this opaque token can be provided to +.Nm zfs Cm send Fl t +to resume and complete the +.Nm zfs Cm receive . +.It Sy redact_snaps +For bookmarks, this is the list of snapshot guids the bookmark contains a redaction +list for. +For snapshots, this is the list of snapshot guids the snapshot is redacted with +respect to. +.It Sy referenced +The amount of data that is accessible by this dataset, which may or may not be +shared with other datasets in the pool. +When a snapshot or clone is created, it initially references the same amount of +space as the file system or snapshot it was created from, since its contents are +identical. +.Pp +This property can also be referred to by its shortened column name, +.Sy refer . +.It Sy refcompressratio +The compression ratio achieved for the +.Sy referenced +space of this dataset, expressed as a multiplier. +See also the +.Sy compressratio +property. +.It Sy snapshot_count +The total number of snapshots that exist under this location in the dataset +tree. +This value is only available when a +.Sy snapshot_limit +has been set somewhere in the tree under which the dataset resides. +.It Sy type +The type of dataset: +.Sy filesystem , +.Sy volume , +.Sy snapshot , +or +.Sy bookmark . +.It Sy used +The amount of space consumed by this dataset and all its descendents. +This is the value that is checked against this dataset's quota and reservation. +The space used does not include this dataset's reservation, but does take into +account the reservations of any descendent datasets. +The amount of space that a dataset consumes from its parent, as well as the +amount of space that is freed if this dataset is recursively destroyed, is the +greater of its space used and its reservation. +.Pp +The used space of a snapshot +.Po see the +.Sx Snapshots +section of +.Xr zfsconcepts 7 +.Pc +is space that is referenced exclusively by this snapshot. +If this snapshot is destroyed, the amount of +.Sy used +space will be freed. +Space that is shared by multiple snapshots isn't accounted for in this metric. +When a snapshot is destroyed, space that was previously shared with this +snapshot can become unique to snapshots adjacent to it, thus changing the used +space of those snapshots. +The used space of the latest snapshot can also be affected by changes in the +file system. +Note that the +.Sy used +space of a snapshot is a subset of the +.Sy written +space of the snapshot. +.Pp +The amount of space used, available, or referenced does not take into account +pending changes. +Pending changes are generally accounted for within a few seconds. +Committing a change to a disk using +.Xr fsync 2 +or +.Sy O_SYNC +does not necessarily guarantee that the space usage information is updated +immediately. +.It Sy usedby* +The +.Sy usedby* +properties decompose the +.Sy used +properties into the various reasons that space is used. +Specifically, +.Sy used No = +.Sy usedbychildren No + +.Sy usedbydataset No + +.Sy usedbyrefreservation No + +.Sy usedbysnapshots . +These properties are only available for datasets created on +.Nm zpool +.Qo version 13 Qc +pools. +.It Sy usedbychildren +The amount of space used by children of this dataset, which would be freed if +all the dataset's children were destroyed. +.It Sy usedbydataset +The amount of space used by this dataset itself, which would be freed if the +dataset were destroyed +.Po after first removing any +.Sy refreservation +and destroying any necessary snapshots or descendents +.Pc . +.It Sy usedbyrefreservation +The amount of space used by a +.Sy refreservation +set on this dataset, which would be freed if the +.Sy refreservation +was removed. +.It Sy usedbysnapshots +The amount of space consumed by snapshots of this dataset. +In particular, it is the amount of space that would be freed if all of this +dataset's snapshots were destroyed. +Note that this is not simply the sum of the snapshots' +.Sy used +properties because space can be shared by multiple snapshots. +.It Sy userused Ns @ Ns Ar user +The amount of space consumed by the specified user in this dataset. +Space is charged to the owner of each file, as displayed by +.Nm ls Fl l . +The amount of space charged is displayed by +.Nm du No and Nm ls Fl s . +See the +.Nm zfs Cm userspace +command for more information. +.Pp +Unprivileged users can access only their own space usage. +The root user, or a user who has been granted the +.Sy userused +privilege with +.Nm zfs Cm allow , +can access everyone's usage. +.Pp +The +.Sy userused Ns @ Ns Ar ... +properties are not displayed by +.Nm zfs Cm get Sy all . +The user's name must be appended after the +.Sy @ +symbol, using one of the following forms: +.Bl -bullet -compact -offset 4n +.It +POSIX name +.Pq Qq joe +.It +POSIX numeric ID +.Pq Qq 789 +.It +SID name +.Pq Qq joe.smith@mydomain +.It +SID numeric ID +.Pq Qq S-1-123-456-789 +.El +.Pp +Files created on Linux always have POSIX owners. +.It Sy userobjused Ns @ Ns Ar user +The +.Sy userobjused +property is similar to +.Sy userused +but instead it counts the number of objects consumed by a user. +This property counts all objects allocated on behalf of the user, +it may differ from the results of system tools such as +.Nm df Fl i . +.Pp +When the property +.Sy xattr Ns = Ns Sy on +is set on a file system additional objects will be created per-file to store +extended attributes. +These additional objects are reflected in the +.Sy userobjused +value and are counted against the user's +.Sy userobjquota . +When a file system is configured to use +.Sy xattr Ns = Ns Sy sa +no additional internal objects are normally required. +.It Sy userrefs +This property is set to the number of user holds on this snapshot. +User holds are set by using the +.Nm zfs Cm hold +command. +.It Sy groupused Ns @ Ns Ar group +The amount of space consumed by the specified group in this dataset. +Space is charged to the group of each file, as displayed by +.Nm ls Fl l . +See the +.Sy userused Ns @ Ns Ar user +property for more information. +.Pp +Unprivileged users can only access their own groups' space usage. +The root user, or a user who has been granted the +.Sy groupused +privilege with +.Nm zfs Cm allow , +can access all groups' usage. +.It Sy groupobjused Ns @ Ns Ar group +The number of objects consumed by the specified group in this dataset. +Multiple objects may be charged to the group for each file when extended +attributes are in use. +See the +.Sy userobjused Ns @ Ns Ar user +property for more information. +.Pp +Unprivileged users can only access their own groups' space usage. +The root user, or a user who has been granted the +.Sy groupobjused +privilege with +.Nm zfs Cm allow , +can access all groups' usage. +.It Sy projectused Ns @ Ns Ar project +The amount of space consumed by the specified project in this dataset. +Project is identified via the project identifier (ID) that is object-based +numeral attribute. +An object can inherit the project ID from its parent object (if the +parent has the flag of inherit project ID that can be set and changed via +.Nm chattr Fl /+P +or +.Nm zfs project Fl s ) +when being created. +The privileged user can set and change object's project +ID via +.Nm chattr Fl p +or +.Nm zfs project Fl s +anytime. +Space is charged to the project of each file, as displayed by +.Nm lsattr Fl p +or +.Nm zfs project . +See the +.Sy userused Ns @ Ns Ar user +property for more information. +.Pp +The root user, or a user who has been granted the +.Sy projectused +privilege with +.Nm zfs allow , +can access all projects' usage. +.It Sy projectobjused Ns @ Ns Ar project +The +.Sy projectobjused +is similar to +.Sy projectused +but instead it counts the number of objects consumed by project. +When the property +.Sy xattr Ns = Ns Sy on +is set on a fileset, ZFS will create additional objects per-file to store +extended attributes. +These additional objects are reflected in the +.Sy projectobjused +value and are counted against the project's +.Sy projectobjquota . +When a filesystem is configured to use +.Sy xattr Ns = Ns Sy sa +no additional internal objects are required. +See the +.Sy userobjused Ns @ Ns Ar user +property for more information. +.Pp +The root user, or a user who has been granted the +.Sy projectobjused +privilege with +.Nm zfs allow , +can access all projects' objects usage. +.It Sy volblocksize +For volumes, specifies the block size of the volume. +The +.Sy blocksize +cannot be changed once the volume has been written, so it should be set at +volume creation time. +The default +.Sy blocksize +for volumes is 8 Kbytes. +Any power of 2 from 512 bytes to 128 Kbytes is valid. +.Pp +This property can also be referred to by its shortened column name, +.Sy volblock . +.It Sy written +The amount of space +.Sy referenced +by this dataset, that was written since the previous snapshot +.Pq i.e. that is not referenced by the previous snapshot . +.It Sy written Ns @ Ns Ar snapshot +The amount of +.Sy referenced +space written to this dataset since the specified snapshot. +This is the space that is referenced by this dataset but was not referenced by +the specified snapshot. +.Pp +The +.Ar snapshot +may be specified as a short snapshot name +.Pq just the part after the Sy @ , +in which case it will be interpreted as a snapshot in the same filesystem as +this dataset. +The +.Ar snapshot +may be a full snapshot name +.Pq Ar filesystem Ns @ Ns Ar snapshot , +which for clones may be a snapshot in the origin's filesystem +.Pq or the origin of the origin's filesystem, etc. +.El +.Pp +The following native properties can be used to change the behavior of a ZFS +dataset. +.Bl -tag -width "" +.It Xo +.Sy aclinherit Ns = Ns Sy discard Ns | Ns Sy noallow Ns | Ns +.Sy restricted Ns | Ns Sy passthrough Ns | Ns Sy passthrough-x +.Xc +Controls how ACEs are inherited when files and directories are created. +.Bl -tag -compact -offset 4n -width "passthrough-x" +.It Sy discard +does not inherit any ACEs. +.It Sy noallow +only inherits inheritable ACEs that specify +.Qq deny +permissions. +.It Sy restricted +default, removes the +.Sy write_acl +and +.Sy write_owner +permissions when the ACE is inherited. +.It Sy passthrough +inherits all inheritable ACEs without any modifications. +.It Sy passthrough-x +same meaning as +.Sy passthrough , +except that the +.Sy owner@ , group@ , No and Sy everyone@ +ACEs inherit the execute permission only if the file creation mode also requests +the execute bit. +.El +.Pp +When the property value is set to +.Sy passthrough , +files are created with a mode determined by the inheritable ACEs. +If no inheritable ACEs exist that affect the mode, then the mode is set in +accordance to the requested mode from the application. +.Pp +The +.Sy aclinherit +property does not apply to POSIX ACLs. +.It Xo +.Sy aclmode Ns = Ns Sy discard Ns | Ns Sy groupmask Ns | Ns +.Sy passthrough Ns | Ns Sy restricted Ns +.Xc +Controls how an ACL is modified during chmod(2) and how inherited ACEs +are modified by the file creation mode: +.Bl -tag -compact -offset 4n -width "passthrough" +.It Sy discard +default, deletes all +.Sy ACEs +except for those representing +the mode of the file or directory requested by +.Xr chmod 2 . +.It Sy groupmask +reduces permissions granted in all +.Sy ALLOW +entries found in the +.Sy ACL +such that they are no greater than the group permissions specified by +.Xr chmod 2 . +.It Sy passthrough +indicates that no changes are made to the ACL other than creating or updating +the necessary ACL entries to represent the new mode of the file or directory. +.It Sy restricted +will cause the +.Xr chmod 2 +operation to return an error when used on any file or directory which has +a non-trivial ACL whose entries can not be represented by a mode. +.Xr chmod 2 +is required to change the set user ID, set group ID, or sticky bits on a file +or directory, as they do not have equivalent ACL entries. +In order to use +.Xr chmod 2 +on a file or directory with a non-trivial ACL when +.Sy aclmode +is set to +.Sy restricted , +you must first remove all ACL entries which do not represent the current mode. +.El +.It Sy acltype Ns = Ns Sy off Ns | Ns Sy nfsv4 Ns | Ns Sy posix +Controls whether ACLs are enabled and if so what type of ACL to use. +When this property is set to a type of ACL not supported by the current +platform, the behavior is the same as if it were set to +.Sy off . +.Bl -tag -compact -offset 4n -width "posixacl" +.It Sy off +default on Linux, when a file system has the +.Sy acltype +property set to off then ACLs are disabled. +.It Sy noacl +an alias for +.Sy off +.It Sy nfsv4 +default on +.Fx , +indicates that NFSv4-style ZFS ACLs should be used. +These ACLs can be managed with the +.Xr getfacl 1 +and +.Xr setfacl 1 . +The +.Sy nfsv4 +ZFS ACL type is not yet supported on Linux. +.It Sy posix +indicates POSIX ACLs should be used. +POSIX ACLs are specific to Linux and are not functional on other platforms. +POSIX ACLs are stored as an extended +attribute and therefore will not overwrite any existing NFSv4 ACLs which +may be set. +.It Sy posixacl +an alias for +.Sy posix +.El +.Pp +To obtain the best performance when setting +.Sy posix +users are strongly encouraged to set the +.Sy xattr Ns = Ns Sy sa +property. +This will result in the POSIX ACL being stored more efficiently on disk. +But as a consequence, all new extended attributes will only be +accessible from OpenZFS implementations which support the +.Sy xattr Ns = Ns Sy sa +property. +See the +.Sy xattr +property for more details. +.It Sy atime Ns = Ns Sy on Ns | Ns Sy off +Controls whether the access time for files is updated when they are read. +Turning this property off avoids producing write traffic when reading files and +can result in significant performance gains, though it might confuse mailers +and other similar utilities. +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy atime +and +.Sy noatime +mount options. +The default value is +.Sy on . +See also +.Sy relatime +below. +.It Sy canmount Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy noauto +If this property is set to +.Sy off , +the file system cannot be mounted, and is ignored by +.Nm zfs Cm mount Fl a . +Setting this property to +.Sy off +is similar to setting the +.Sy mountpoint +property to +.Sy none , +except that the dataset still has a normal +.Sy mountpoint +property, which can be inherited. +Setting this property to +.Sy off +allows datasets to be used solely as a mechanism to inherit properties. +One example of setting +.Sy canmount Ns = Ns Sy off +is to have two datasets with the same +.Sy mountpoint , +so that the children of both datasets appear in the same directory, but might +have different inherited characteristics. +.Pp +When set to +.Sy noauto , +a dataset can only be mounted and unmounted explicitly. +The dataset is not mounted automatically when the dataset is created or +imported, nor is it mounted by the +.Nm zfs Cm mount Fl a +command or unmounted by the +.Nm zfs Cm unmount Fl a +command. +.Pp +This property is not inherited. +.It Xo +.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns +.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns +.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr +.Xc +Controls the checksum used to verify data integrity. +The default value is +.Sy on , +which automatically selects an appropriate algorithm +.Po currently, +.Sy fletcher4 , +but this may change in future releases +.Pc . +The value +.Sy off +disables integrity checking on user data. +The value +.Sy noparity +not only disables integrity but also disables maintaining parity for user data. +This setting is used internally by a dump device residing on a RAID-Z pool and +should not be used by any other dataset. +Disabling checksums is +.Em NOT +a recommended practice. +.Pp +The +.Sy sha512 , +.Sy skein , +and +.Sy edonr +checksum algorithms require enabling the appropriate features on the pool. +.Fx +does not support the +.Sy edonr +algorithm. +.Pp +Please see +.Xr zpool-features 7 +for more information on these algorithms. +.Pp +Changing this property affects only newly-written data. +.It Xo +.Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns +.Sy gzip- Ns Ar N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle Ns | Ns Sy zstd Ns | Ns +.Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns Ar N +.Xc +Controls the compression algorithm used for this dataset. +.Pp +Setting compression to +.Sy on +indicates that the current default compression algorithm should be used. +The default balances compression and decompression speed, with compression ratio +and is expected to work well on a wide variety of workloads. +Unlike all other settings for this property, +.Sy on +does not select a fixed compression type. +As new compression algorithms are added to ZFS and enabled on a pool, the +default compression algorithm may change. +The current default compression algorithm is either +.Sy lzjb +or, if the +.Sy lz4_compress +feature is enabled, +.Sy lz4 . +.Pp +The +.Sy lz4 +compression algorithm is a high-performance replacement for the +.Sy lzjb +algorithm. +It features significantly faster compression and decompression, as well as a +moderately higher compression ratio than +.Sy lzjb , +but can only be used on pools with the +.Sy lz4_compress +feature set to +.Sy enabled . +See +.Xr zpool-features 7 +for details on ZFS feature flags and the +.Sy lz4_compress +feature. +.Pp +The +.Sy lzjb +compression algorithm is optimized for performance while providing decent data +compression. +.Pp +The +.Sy gzip +compression algorithm uses the same compression as the +.Xr gzip 1 +command. +You can specify the +.Sy gzip +level by using the value +.Sy gzip- Ns Ar N , +where +.Ar N +is an integer from 1 +.Pq fastest +to 9 +.Pq best compression ratio . +Currently, +.Sy gzip +is equivalent to +.Sy gzip-6 +.Po which is also the default for +.Xr gzip 1 +.Pc . +.Pp +The +.Sy zstd +compression algorithm provides both high compression ratios and good performance. +You can specify the +.Sy zstd +level by using the value +.Sy zstd- Ns Ar N , +where +.Ar N +is an integer from 1 +.Pq fastest +to 19 +.Pq best compression ratio . +.Sy zstd +is equivalent to +.Sy zstd-3 . +.Pp +Faster speeds at the cost of the compression ratio can be requested by +setting a negative +.Sy zstd +level. +This is done using +.Sy zstd-fast- Ns Ar N , +where +.Ar N +is an integer in [1-9,10,20,30,...,100,500,1000] which maps to a negative +.Sy zstd +level. +The lower the level the faster the compression - +.Ar 1000 No provides the fastest compression and lowest compression ratio. +.Sy zstd-fast +is equivalent to +.Sy zstd-fast-1 . +.Pp +The +.Sy zle +compression algorithm compresses runs of zeros. +.Pp +This property can also be referred to by its shortened column name +.Sy compress . +Changing this property affects only newly-written data. +.Pp +When any setting except +.Sy off +is selected, compression will explicitly check for blocks consisting of only +zeroes (the NUL byte). +When a zero-filled block is detected, it is stored as +a hole and not compressed using the indicated compression algorithm. +.Pp +Any block being compressed must be no larger than 7/8 of its original size +after compression, otherwise the compression will not be considered worthwhile +and the block saved uncompressed. +Note that when the logical block is less than +8 times the disk sector size this effectively reduces the necessary compression +ratio; for example, 8kB blocks on disks with 4kB disk sectors must compress to 1/2 +or less of their original size. +.It Xo +.Sy context Ns = Ns Sy none Ns | Ns +.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level +.Xc +This flag sets the SELinux context for all files in the file system under +a mount point for that file system. +See +.Xr selinux 8 +for more information. +.It Xo +.Sy fscontext Ns = Ns Sy none Ns | Ns +.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level +.Xc +This flag sets the SELinux context for the file system file system being +mounted. +See +.Xr selinux 8 +for more information. +.It Xo +.Sy defcontext Ns = Ns Sy none Ns | Ns +.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level +.Xc +This flag sets the SELinux default context for unlabeled files. +See +.Xr selinux 8 +for more information. +.It Xo +.Sy rootcontext Ns = Ns Sy none Ns | Ns +.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level +.Xc +This flag sets the SELinux context for the root inode of the file system. +See +.Xr selinux 8 +for more information. +.It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3 +Controls the number of copies of data stored for this dataset. +These copies are in addition to any redundancy provided by the pool, for +example, mirroring or RAID-Z. +The copies are stored on different disks, if possible. +The space used by multiple copies is charged to the associated file and dataset, +changing the +.Sy used +property and counting against quotas and reservations. +.Pp +Changing this property only affects newly-written data. +Therefore, set this property at file system creation time by using the +.Fl o Sy copies Ns = Ns Ar N +option. +.Pp +Remember that ZFS will not import a pool with a missing top-level vdev. +Do +.Em NOT +create, for example a two-disk striped pool and set +.Sy copies Ns = Ns Ar 2 +on some datasets thinking you have setup redundancy for them. +When a disk fails you will not be able to import the pool +and will have lost all of your data. +.Pp +Encrypted datasets may not have +.Sy copies Ns = Ns Ar 3 +since the implementation stores some encryption metadata where the third copy +would normally be. +.It Sy devices Ns = Ns Sy on Ns | Ns Sy off +Controls whether device nodes can be opened on this file system. +The default value is +.Sy on . +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy dev +and +.Sy nodev +mount options. +.It Xo +.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns +.Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns +.Sy edonr , Ns Sy verify +.Xc +Configures deduplication for a dataset. +The default value is +.Sy off . +The default deduplication checksum is +.Sy sha256 +(this may change in the future). +When +.Sy dedup +is enabled, the checksum defined here overrides the +.Sy checksum +property. +Setting the value to +.Sy verify +has the same effect as the setting +.Sy sha256 , Ns Sy verify . +.Pp +If set to +.Sy verify , +ZFS will do a byte-to-byte comparison in case of two blocks having the same +signature to make sure the block contents are identical. +Specifying +.Sy verify +is mandatory for the +.Sy edonr +algorithm. +.Pp +Unless necessary, deduplication should +.Em not +be enabled on a system. +See the +.Sx Deduplication +section of +.Xr zfsconcepts 7 . +.It Xo +.Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns +.Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k +.Xc +Specifies a compatibility mode or literal value for the size of dnodes in the +file system. +The default value is +.Sy legacy . +Setting this property to a value other than +.Sy legacy No requires the Sy large_dnode No pool feature to be enabled. +.Pp +Consider setting +.Sy dnodesize +to +.Sy auto +if the dataset uses the +.Sy xattr Ns = Ns Sy sa +property setting and the workload makes heavy use of extended attributes. +This +may be applicable to SELinux-enabled systems, Lustre servers, and Samba +servers, for example. +Literal values are supported for cases where the optimal +size is known in advance and for performance testing. +.Pp +Leave +.Sy dnodesize +set to +.Sy legacy +if you need to receive a send stream of this dataset on a pool that doesn't +enable the +.Sy large_dnode +feature, or if you need to import this pool on a system that doesn't support the +.Sy large_dnode No feature. +.Pp +This property can also be referred to by its shortened column name, +.Sy dnsize . +.It Xo +.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns +.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns +.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm +.Xc +Controls the encryption cipher suite (block cipher, key length, and mode) used +for this dataset. +Requires the +.Sy encryption +feature to be enabled on the pool. +Requires a +.Sy keyformat +to be set at dataset creation time. +.Pp +Selecting +.Sy encryption Ns = Ns Sy on +when creating a dataset indicates that the default encryption suite will be +selected, which is currently +.Sy aes-256-gcm . +In order to provide consistent data protection, encryption must be specified at +dataset creation time and it cannot be changed afterwards. +.Pp +For more details and caveats about encryption see the +.Sx Encryption +section of +.Xr zfs-load-key 8 . +.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase +Controls what format the user's encryption key will be provided as. +This property is only set when the dataset is encrypted. +.Pp +Raw keys and hex keys must be 32 bytes long (regardless of the chosen +encryption suite) and must be randomly generated. +A raw key can be generated with the following command: +.Dl # Nm dd Sy if=/dev/urandom bs=32 count=1 Sy of= Ns Pa /path/to/output/key +.Pp +Passphrases must be between 8 and 512 bytes long and will be processed through +PBKDF2 before being used (see the +.Sy pbkdf2iters +property). +Even though the encryption suite cannot be changed after dataset creation, +the keyformat can be with +.Nm zfs Cm change-key . +.It Xo +.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Ar /absolute/file/path Ns | Ns Sy https:// Ns Ar address Ns | Ns Sy http:// Ns Ar address +.Xc +Controls where the user's encryption key will be loaded from by default for +commands such as +.Nm zfs Cm load-key +and +.Nm zfs Cm mount Fl l . +This property is only set for encrypted datasets which are encryption roots. +If unspecified, the default is +.Sy prompt . +.Pp +Even though the encryption suite cannot be changed after dataset creation, the +keylocation can be with either +.Nm zfs Cm set +or +.Nm zfs Cm change-key . +If +.Sy prompt +is selected ZFS will ask for the key at the command prompt when it is required +to access the encrypted data (see +.Nm zfs Cm load-key +for details). +This setting will also allow the key to be passed in via the standard input stream, +but users should be careful not to place keys which should be kept secret on +the command line. +If a file URI is selected, the key will be loaded from the +specified absolute file path. +If an HTTPS or HTTP URL is selected, it will be GETted using +.Xr fetch 3 , +libcurl, or nothing, depending on compile-time configuration and run-time availability. +The +.Sy SSL_CA_CERT_FILE +environment variable can be set to set the location +of the concatenated certificate store. +The +.Sy SSL_CA_CERT_PATH +environment variable can be set to override the location +of the directory containing the certificate authority bundle. +The +.Sy SSL_CLIENT_CERT_FILE +and +.Sy SSL_CLIENT_KEY_FILE +environment variables can be set to configure the path +to the client certificate and its key. +.It Sy pbkdf2iters Ns = Ns Ar iterations +Controls the number of PBKDF2 iterations that a +.Sy passphrase +encryption key should be run through when processing it into an encryption key. +This property is only defined when encryption is enabled and a keyformat of +.Sy passphrase +is selected. +The goal of PBKDF2 is to significantly increase the +computational difficulty needed to brute force a user's passphrase. +This is accomplished by forcing the attacker to run each passphrase through a +computationally expensive hashing function many times before they arrive at the +resulting key. +A user who actually knows the passphrase will only have to pay this cost once. +As CPUs become better at processing, this number should be +raised to ensure that a brute force attack is still not possible. +The current default is +.Sy 350000 +and the minimum is +.Sy 100000 . +This property may be changed with +.Nm zfs Cm change-key . +.It Sy exec Ns = Ns Sy on Ns | Ns Sy off +Controls whether processes can be executed from within this file system. +The default value is +.Sy on . +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy exec +and +.Sy noexec +mount options. +.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none +Limits the number of filesystems and volumes that can exist under this point in +the dataset tree. +The limit is not enforced if the user is allowed to change the limit. +Setting a +.Sy filesystem_limit +to +.Sy on +a descendent of a filesystem that already has a +.Sy filesystem_limit +does not override the ancestor's +.Sy filesystem_limit , +but rather imposes an additional limit. +This feature must be enabled to be used +.Po see +.Xr zpool-features 7 +.Pc . +.It Sy special_small_blocks Ns = Ns Ar size +This value represents the threshold block size for including small file +blocks into the special allocation class. +Blocks smaller than or equal to this +value will be assigned to the special allocation class while greater blocks +will be assigned to the regular class. +Valid values are zero or a power of two from 512B up to 1M. +The default size is 0 which means no small file blocks +will be allocated in the special class. +.Pp +Before setting this property, a special class vdev must be added to the +pool. +See +.Xr zpoolconcepts 7 +for more details on the special allocation class. +.It Sy mountpoint Ns = Ns Pa path Ns | Ns Sy none Ns | Ns Sy legacy +Controls the mount point used for this file system. +See the +.Sx Mount Points +section of +.Xr zfsconcepts 7 +for more information on how this property is used. +.Pp +When the +.Sy mountpoint +property is changed for a file system, the file system and any children that +inherit the mount point are unmounted. +If the new value is +.Sy legacy , +then they remain unmounted. +Otherwise, they are automatically remounted in the new location if the property +was previously +.Sy legacy +or +.Sy none , +or if they were mounted before the property was changed. +In addition, any shared file systems are unshared and shared in the new +location. +.It Sy nbmand Ns = Ns Sy on Ns | Ns Sy off +Controls whether the file system should be mounted with +.Sy nbmand +.Pq Non-blocking mandatory locks . +This is used for SMB clients. +Changes to this property only take effect when the file system is umounted and +remounted. +Support for these locks is scarce and not described by POSIX. +.It Sy overlay Ns = Ns Sy on Ns | Ns Sy off +Allow mounting on a busy directory or a directory which already contains +files or directories. +This is the default mount behavior for Linux and +.Fx +file systems. +On these platforms the property is +.Sy on +by default. +Set to +.Sy off +to disable overlay mounts for consistency with OpenZFS on other platforms. +.It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata +Controls what is cached in the primary cache +.Pq ARC . +If this property is set to +.Sy all , +then both user data and metadata is cached. +If this property is set to +.Sy none , +then neither user data nor metadata is cached. +If this property is set to +.Sy metadata , +then only metadata is cached. +The default value is +.Sy all . +.It Sy quota Ns = Ns Ar size Ns | Ns Sy none +Limits the amount of space a dataset and its descendents can consume. +This property enforces a hard limit on the amount of space used. +This includes all space consumed by descendents, including file systems and +snapshots. +Setting a quota on a descendent of a dataset that already has a quota does not +override the ancestor's quota, but rather imposes an additional limit. +.Pp +Quotas cannot be set on volumes, as the +.Sy volsize +property acts as an implicit quota. +.It Sy snapshot_limit Ns = Ns Ar count Ns | Ns Sy none +Limits the number of snapshots that can be created on a dataset and its +descendents. +Setting a +.Sy snapshot_limit +on a descendent of a dataset that already has a +.Sy snapshot_limit +does not override the ancestor's +.Sy snapshot_limit , +but rather imposes an additional limit. +The limit is not enforced if the user is allowed to change the limit. +For example, this means that recursive snapshots taken from the global zone are +counted against each delegated dataset within a zone. +This feature must be enabled to be used +.Po see +.Xr zpool-features 7 +.Pc . +.It Sy userquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none +Limits the amount of space consumed by the specified user. +User space consumption is identified by the +.Sy userspace@ Ns Ar user +property. +.Pp +Enforcement of user quotas may be delayed by several seconds. +This delay means that a user might exceed their quota before the system notices +that they are over quota and begins to refuse additional writes with the +.Er EDQUOT +error message. +See the +.Nm zfs Cm userspace +command for more information. +.Pp +Unprivileged users can only access their own groups' space usage. +The root user, or a user who has been granted the +.Sy userquota +privilege with +.Nm zfs Cm allow , +can get and set everyone's quota. +.Pp +This property is not available on volumes, on file systems before version 4, or +on pools before version 15. +The +.Sy userquota@ Ns Ar ... +properties are not displayed by +.Nm zfs Cm get Sy all . +The user's name must be appended after the +.Sy @ +symbol, using one of the following forms: +.Bl -bullet -compact -offset 4n +.It +POSIX name +.Pq Qq joe +.It +POSIX numeric ID +.Pq Qq 789 +.It +SID name +.Pq Qq joe.smith@mydomain +.It +SID numeric ID +.Pq Qq S-1-123-456-789 +.El +.Pp +Files created on Linux always have POSIX owners. +.It Sy userobjquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none +The +.Sy userobjquota +is similar to +.Sy userquota +but it limits the number of objects a user can create. +Please refer to +.Sy userobjused +for more information about how objects are counted. +.It Sy groupquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none +Limits the amount of space consumed by the specified group. +Group space consumption is identified by the +.Sy groupused@ Ns Ar group +property. +.Pp +Unprivileged users can access only their own groups' space usage. +The root user, or a user who has been granted the +.Sy groupquota +privilege with +.Nm zfs Cm allow , +can get and set all groups' quotas. +.It Sy groupobjquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none +The +.Sy groupobjquota +is similar to +.Sy groupquota +but it limits number of objects a group can consume. +Please refer to +.Sy userobjused +for more information about how objects are counted. +.It Sy projectquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none +Limits the amount of space consumed by the specified project. +Project space consumption is identified by the +.Sy projectused@ Ns Ar project +property. +Please refer to +.Sy projectused +for more information about how project is identified and set/changed. +.Pp +The root user, or a user who has been granted the +.Sy projectquota +privilege with +.Nm zfs allow , +can access all projects' quota. +.It Sy projectobjquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none +The +.Sy projectobjquota +is similar to +.Sy projectquota +but it limits number of objects a project can consume. +Please refer to +.Sy userobjused +for more information about how objects are counted. +.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off +Controls whether this dataset can be modified. +The default value is +.Sy off . +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy ro +and +.Sy rw +mount options. +.Pp +This property can also be referred to by its shortened column name, +.Sy rdonly . +.It Sy recordsize Ns = Ns Ar size +Specifies a suggested block size for files in the file system. +This property is designed solely for use with database workloads that access +files in fixed-size records. +ZFS automatically tunes block sizes according to internal algorithms optimized +for typical access patterns. +.Pp +For databases that create very large files but access them in small random +chunks, these algorithms may be suboptimal. +Specifying a +.Sy recordsize +greater than or equal to the record size of the database can result in +significant performance gains. +Use of this property for general purpose file systems is strongly discouraged, +and may adversely affect performance. +.Pp +The size specified must be a power of two greater than or equal to +.Ar 512B +and less than or equal to +.Ar 128kB . +If the +.Sy large_blocks +feature is enabled on the pool, the size may be up to +.Ar 1MB . +See +.Xr zpool-features 7 +for details on ZFS feature flags. +.Pp +Changing the file system's +.Sy recordsize +affects only files created afterward; existing files are unaffected. +.Pp +This property can also be referred to by its shortened column name, +.Sy recsize . +.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most +Controls what types of metadata are stored redundantly. +ZFS stores an extra copy of metadata, so that if a single block is corrupted, +the amount of user data lost is limited. +This extra copy is in addition to any redundancy provided at the pool level +.Pq e.g. by mirroring or RAID-Z , +and is in addition to an extra copy specified by the +.Sy copies +property +.Pq up to a total of 3 copies . +For example if the pool is mirrored, +.Sy copies Ns = Ns 2 , +and +.Sy redundant_metadata Ns = Ns Sy most , +then ZFS stores 6 copies of most metadata, and 4 copies of data and some +metadata. +.Pp +When set to +.Sy all , +ZFS stores an extra copy of all metadata. +If a single on-disk block is corrupt, at worst a single block of user data +.Po which is +.Sy recordsize +bytes long +.Pc +can be lost. +.Pp +When set to +.Sy most , +ZFS stores an extra copy of most types of metadata. +This can improve performance of random writes, because less metadata must be +written. +In practice, at worst about 100 blocks +.Po of +.Sy recordsize +bytes each +.Pc +of user data can be lost if a single on-disk block is corrupt. +The exact behavior of which metadata blocks are stored redundantly may change in +future releases. +.Pp +The default value is +.Sy all . +.It Sy refquota Ns = Ns Ar size Ns | Ns Sy none +Limits the amount of space a dataset can consume. +This property enforces a hard limit on the amount of space used. +This hard limit does not include space used by descendents, including file +systems and snapshots. +.It Sy refreservation Ns = Ns Ar size Ns | Ns Sy none Ns | Ns Sy auto +The minimum amount of space guaranteed to a dataset, not including its +descendents. +When the amount of space used is below this value, the dataset is treated as if +it were taking up the amount of space specified by +.Sy refreservation . +The +.Sy refreservation +reservation is accounted for in the parent datasets' space used, and counts +against the parent datasets' quotas and reservations. +.Pp +If +.Sy refreservation +is set, a snapshot is only allowed if there is enough free pool space outside of +this reservation to accommodate the current number of +.Qq referenced +bytes in the dataset. +.Pp +If +.Sy refreservation +is set to +.Sy auto , +a volume is thick provisioned +.Po or +.Qq not sparse +.Pc . +.Sy refreservation Ns = Ns Sy auto +is only supported on volumes. +See +.Sy volsize +in the +.Sx Native Properties +section for more information about sparse volumes. +.Pp +This property can also be referred to by its shortened column name, +.Sy refreserv . +.It Sy relatime Ns = Ns Sy on Ns | Ns Sy off +Controls the manner in which the access time is updated when +.Sy atime Ns = Ns Sy on +is set. +Turning this property on causes the access time to be updated relative +to the modify or change time. +Access time is only updated if the previous +access time was earlier than the current modify or change time or if the +existing access time hasn't been updated within the past 24 hours. +The default value is +.Sy off . +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy relatime +and +.Sy norelatime +mount options. +.It Sy reservation Ns = Ns Ar size Ns | Ns Sy none +The minimum amount of space guaranteed to a dataset and its descendants. +When the amount of space used is below this value, the dataset is treated as if +it were taking up the amount of space specified by its reservation. +Reservations are accounted for in the parent datasets' space used, and count +against the parent datasets' quotas and reservations. +.Pp +This property can also be referred to by its shortened column name, +.Sy reserv . +.It Sy secondarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata +Controls what is cached in the secondary cache +.Pq L2ARC . +If this property is set to +.Sy all , +then both user data and metadata is cached. +If this property is set to +.Sy none , +then neither user data nor metadata is cached. +If this property is set to +.Sy metadata , +then only metadata is cached. +The default value is +.Sy all . +.It Sy setuid Ns = Ns Sy on Ns | Ns Sy off +Controls whether the setuid bit is respected for the file system. +The default value is +.Sy on . +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy suid +and +.Sy nosuid +mount options. +.It Sy sharesmb Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts +Controls whether the file system is shared by using +.Sy Samba USERSHARES +and what options are to be used. +Otherwise, the file system is automatically shared and unshared with the +.Nm zfs Cm share +and +.Nm zfs Cm unshare +commands. +If the property is set to on, the +.Xr net 8 +command is invoked to create a +.Sy USERSHARE . +.Pp +Because SMB shares requires a resource name, a unique resource name is +constructed from the dataset name. +The constructed name is a copy of the +dataset name except that the characters in the dataset name, which would be +invalid in the resource name, are replaced with underscore (_) characters. +Linux does not currently support additional options which might be available +on Solaris. +.Pp +If the +.Sy sharesmb +property is set to +.Sy off , +the file systems are unshared. +.Pp +The share is created with the ACL (Access Control List) "Everyone:F" ("F" +stands for "full permissions", i.e. read and write permissions) and no guest +access (which means Samba must be able to authenticate a real user, system +passwd/shadow, LDAP or smbpasswd based) by default. +This means that any additional access control +(disallow specific user specific access etc) must be done on the underlying file system. +.It Sy sharenfs Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts +Controls whether the file system is shared via NFS, and what options are to be +used. +A file system with a +.Sy sharenfs +property of +.Sy off +is managed with the +.Xr exportfs 8 +command and entries in the +.Pa /etc/exports +file. +Otherwise, the file system is automatically shared and unshared with the +.Nm zfs Cm share +and +.Nm zfs Cm unshare +commands. +If the property is set to +.Sy on , +the dataset is shared using the default options: +.Dl sec=sys,rw,crossmnt,no_subtree_check +.Pp +See +.Xr exports 5 +for the meaning of the default options. +Otherwise, the +.Xr exportfs 8 +command is invoked with options equivalent to the contents of this property. +.Pp +When the +.Sy sharenfs +property is changed for a dataset, the dataset and any children inheriting the +property are re-shared with the new options, only if the property was previously +.Sy off , +or if they were shared before the property was changed. +If the new property is +.Sy off , +the file systems are unshared. +.It Sy logbias Ns = Ns Sy latency Ns | Ns Sy throughput +Provide a hint to ZFS about handling of synchronous requests in this dataset. +If +.Sy logbias +is set to +.Sy latency +.Pq the default , +ZFS will use pool log devices +.Pq if configured +to handle the requests at low latency. +If +.Sy logbias +is set to +.Sy throughput , +ZFS will not use configured pool log devices. +ZFS will instead optimize synchronous operations for global pool throughput and +efficient use of resources. +.It Sy snapdev Ns = Ns Sy hidden Ns | Ns Sy visible +Controls whether the volume snapshot devices under +.Pa /dev/zvol/ Ns Aq Ar pool +are hidden or visible. +The default value is +.Sy hidden . +.It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible +Controls whether the +.Pa .zfs +directory is hidden or visible in the root of the file system as discussed in +the +.Sx Snapshots +section of +.Xr zfsconcepts 7 . +The default value is +.Sy hidden . +.It Sy sync Ns = Ns Sy standard Ns | Ns Sy always Ns | Ns Sy disabled +Controls the behavior of synchronous requests +.Pq e.g. fsync, O_DSYNC . +.Sy standard +is the POSIX-specified behavior of ensuring all synchronous requests +are written to stable storage and all devices are flushed to ensure +data is not cached by device controllers +.Pq this is the default . +.Sy always +causes every file system transaction to be written and flushed before its +system call returns. +This has a large performance penalty. +.Sy disabled +disables synchronous requests. +File system transactions are only committed to stable storage periodically. +This option will give the highest performance. +However, it is very dangerous as ZFS would be ignoring the synchronous +transaction demands of applications such as databases or NFS. +Administrators should only use this option when the risks are understood. +.It Sy version Ns = Ns Ar N Ns | Ns Sy current +The on-disk version of this file system, which is independent of the pool +version. +This property can only be set to later supported versions. +See the +.Nm zfs Cm upgrade +command. +.It Sy volsize Ns = Ns Ar size +For volumes, specifies the logical size of the volume. +By default, creating a volume establishes a reservation of equal size. +For storage pools with a version number of 9 or higher, a +.Sy refreservation +is set instead. +Any changes to +.Sy volsize +are reflected in an equivalent change to the reservation +.Pq or Sy refreservation . +The +.Sy volsize +can only be set to a multiple of +.Sy volblocksize , +and cannot be zero. +.Pp +The reservation is kept equal to the volume's logical size to prevent unexpected +behavior for consumers. +Without the reservation, the volume could run out of space, resulting in +undefined behavior or data corruption, depending on how the volume is used. +These effects can also occur when the volume size is changed while it is in use +.Pq particularly when shrinking the size . +Extreme care should be used when adjusting the volume size. +.Pp +Though not recommended, a +.Qq sparse volume +.Po also known as +.Qq thin provisioned +.Pc +can be created by specifying the +.Fl s +option to the +.Nm zfs Cm create Fl V +command, or by changing the value of the +.Sy refreservation +property +.Po or +.Sy reservation +property on pool version 8 or earlier +.Pc +after the volume has been created. +A +.Qq sparse volume +is a volume where the value of +.Sy refreservation +is less than the size of the volume plus the space required to store its +metadata. +Consequently, writes to a sparse volume can fail with +.Er ENOSPC +when the pool is low on space. +For a sparse volume, changes to +.Sy volsize +are not reflected in the +.Sy refreservation . +A volume that is not sparse is said to be +.Qq thick provisioned . +A sparse volume can become thick provisioned by setting +.Sy refreservation +to +.Sy auto . +.It Sy volmode Ns = Ns Sy default Ns | Ns Sy full Ns | Ns Sy geom Ns | Ns Sy dev Ns | Ns Sy none +This property specifies how volumes should be exposed to the OS. +Setting it to +.Sy full +exposes volumes as fully fledged block devices, providing maximal +functionality. +The value +.Sy geom +is just an alias for +.Sy full +and is kept for compatibility. +Setting it to +.Sy dev +hides its partitions. +Volumes with property set to +.Sy none +are not exposed outside ZFS, but can be snapshotted, cloned, replicated, etc, +that can be suitable for backup purposes. +Value +.Sy default +means that volumes exposition is controlled by system-wide tunable +.Sy zvol_volmode , +where +.Sy full , +.Sy dev +and +.Sy none +are encoded as 1, 2 and 3 respectively. +The default value is +.Sy full . +.It Sy vscan Ns = Ns Sy on Ns | Ns Sy off +Controls whether regular files should be scanned for viruses when a file is +opened and closed. +In addition to enabling this property, the virus scan service must also be +enabled for virus scanning to occur. +The default value is +.Sy off . +This property is not used by OpenZFS. +.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa +Controls whether extended attributes are enabled for this file system. +Two styles of extended attributes are supported: either directory based +or system attribute based. +.Pp +The default value of +.Sy on +enables directory based extended attributes. +This style of extended attribute imposes no practical limit +on either the size or number of attributes which can be set on a file. +Although under Linux the +.Xr getxattr 2 +and +.Xr setxattr 2 +system calls limit the maximum size to 64K. +This is the most compatible +style of extended attribute and is supported by all ZFS implementations. +.Pp +System attribute based xattrs can be enabled by setting the value to +.Sy sa . +The key advantage of this type of xattr is improved performance. +Storing extended attributes as system attributes +significantly decreases the amount of disk IO required. +Up to 64K of data may be stored per-file in the space reserved for system attributes. +If there is not enough space available for an extended attribute +then it will be automatically written as a directory based xattr. +System attribute based extended attributes are not accessible +on platforms which do not support the +.Sy xattr Ns = Ns Sy sa +feature. +OpenZFS supports +.Sy xattr Ns = Ns Sy sa +on both +.Fx +and Linux. +.Pp +The use of system attribute based xattrs is strongly encouraged for users of +SELinux or POSIX ACLs. +Both of these features heavily rely on extended +attributes and benefit significantly from the reduced access time. +.Pp +The values +.Sy on +and +.Sy off +are equivalent to the +.Sy xattr +and +.Sy noxattr +mount options. +.It Sy jailed Ns = Ns Sy off Ns | Ns Sy on +Controls whether the dataset is managed from a jail. +See +.Xr zfs-jail 8 +for more information. +Jails are a +.Fx +feature and are not relevant on other platforms. +The default value is +.Sy off . +.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off +Controls whether the dataset is managed from a non-global zone. +Zones are a Solaris feature and are not relevant on other platforms. +The default value is +.Sy off . +.El +.Pp +The following three properties cannot be changed after the file system is +created, and therefore, should be set when the file system is created. +If the properties are not set with the +.Nm zfs Cm create +or +.Nm zpool Cm create +commands, these properties are inherited from the parent dataset. +If the parent dataset lacks these properties due to having been created prior to +these features being supported, the new file system will have the default values +for these properties. +.Bl -tag -width "" +.It Xo +.Sy casesensitivity Ns = Ns Sy sensitive Ns | Ns +.Sy insensitive Ns | Ns Sy mixed +.Xc +Indicates whether the file name matching algorithm used by the file system +should be case-sensitive, case-insensitive, or allow a combination of both +styles of matching. +The default value for the +.Sy casesensitivity +property is +.Sy sensitive . +Traditionally, +.Ux +and POSIX file systems have case-sensitive file names. +.Pp +The +.Sy mixed +value for the +.Sy casesensitivity +property indicates that the file system can support requests for both +case-sensitive and case-insensitive matching behavior. +Currently, case-insensitive matching behavior on a file system that supports +mixed behavior is limited to the SMB server product. +For more information about the +.Sy mixed +value behavior, see the "ZFS Administration Guide". +.It Xo +.Sy normalization Ns = Ns Sy none Ns | Ns Sy formC Ns | Ns +.Sy formD Ns | Ns Sy formKC Ns | Ns Sy formKD +.Xc +Indicates whether the file system should perform a +.Sy unicode +normalization of file names whenever two file names are compared, and which +normalization algorithm should be used. +File names are always stored unmodified, names are normalized as part of any +comparison process. +If this property is set to a legal value other than +.Sy none , +and the +.Sy utf8only +property was left unspecified, the +.Sy utf8only +property is automatically set to +.Sy on . +The default value of the +.Sy normalization +property is +.Sy none . +This property cannot be changed after the file system is created. +.It Sy utf8only Ns = Ns Sy on Ns | Ns Sy off +Indicates whether the file system should reject file names that include +characters that are not present in the +.Sy UTF-8 +character code set. +If this property is explicitly set to +.Sy off , +the normalization property must either not be explicitly set or be set to +.Sy none . +The default value for the +.Sy utf8only +property is +.Sy off . +This property cannot be changed after the file system is created. +.El +.Pp +The +.Sy casesensitivity , +.Sy normalization , +and +.Sy utf8only +properties are also new permissions that can be assigned to non-privileged users +by using the ZFS delegated administration feature. +. +.Ss Temporary Mount Point Properties +When a file system is mounted, either through +.Xr mount 8 +for legacy mounts or the +.Nm zfs Cm mount +command for normal file systems, its mount options are set according to its +properties. +The correlation between properties and mount options is as follows: +.Bl -tag -compact -offset Ds -width "rootcontext=" +.It Sy atime +atime/noatime +.It Sy canmount +auto/noauto +.It Sy devices +dev/nodev +.It Sy exec +exec/noexec +.It Sy readonly +ro/rw +.It Sy relatime +relatime/norelatime +.It Sy setuid +suid/nosuid +.It Sy xattr +xattr/noxattr +.It Sy nbmand +mand/nomand +.It Sy context Ns = +context= +.It Sy fscontext Ns = +fscontext= +.It Sy defcontext Ns = +defcontext= +.It Sy rootcontext Ns = +rootcontext= +.El +.Pp +In addition, these options can be set on a per-mount basis using the +.Fl o +option, without affecting the property that is stored on disk. +The values specified on the command line override the values stored in the +dataset. +The +.Sy nosuid +option is an alias for +.Sy nodevices , Ns Sy nosetuid . +These properties are reported as +.Qq temporary +by the +.Nm zfs Cm get +command. +If the properties are changed while the dataset is mounted, the new setting +overrides any temporary settings. +. +.Ss User Properties +In addition to the standard native properties, ZFS supports arbitrary user +properties. +User properties have no effect on ZFS behavior, but applications or +administrators can use them to annotate datasets +.Pq file systems, volumes, and snapshots . +.Pp +User property names must contain a colon +.Pq Qq Sy \&: +character to distinguish them from native properties. +They may contain lowercase letters, numbers, and the following punctuation +characters: colon +.Pq Qq Sy \&: , +dash +.Pq Qq Sy - , +period +.Pq Qq Sy \&. , +and underscore +.Pq Qq Sy _ . +The expected convention is that the property name is divided into two portions +such as +.Ar module : Ns Ar property , +but this namespace is not enforced by ZFS. +User property names can be at most 256 characters, and cannot begin with a dash +.Pq Qq Sy - . +.Pp +When making programmatic use of user properties, it is strongly suggested to use +a reversed DNS domain name for the +.Ar module +component of property names to reduce the chance that two +independently-developed packages use the same property name for different +purposes. +.Pp +The values of user properties are arbitrary strings, are always inherited, and +are never validated. +All of the commands that operate on properties +.Po Nm zfs Cm list , +.Nm zfs Cm get , +.Nm zfs Cm set , +and so forth +.Pc +can be used to manipulate both native properties and user properties. +Use the +.Nm zfs Cm inherit +command to clear a user property. +If the property is not defined in any parent dataset, it is removed entirely. +Property values are limited to 8192 bytes. diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 new file mode 100644 index 000000000000..83ca91175370 --- /dev/null +++ b/man/man7/zpool-features.7 @@ -0,0 +1,842 @@ +.\" +.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" The contents of this file are subject to the terms of the Common Development +.\" and Distribution License (the "License"). You may not use this file except +.\" in compliance with the License. You can obtain a copy of the license at +.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. +.\" +.\" See the License for the specific language governing permissions and +.\" limitations under the License. When distributing Covered Code, include this +.\" CDDL HEADER in each file and include the License file at +.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this +.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your +.\" own identifying information: +.\" Portions Copyright [yyyy] [name of copyright owner] +.\" Copyright (c) 2019, Klara Inc. +.\" Copyright (c) 2019, Allan Jude +.\" Copyright (c) 2021, Colm Buckley +.\" +.Dd May 31, 2021 +.Dt ZPOOL-FEATURES 7 +.Os +. +.Sh NAME +.Nm zpool-features +.Nd description of ZFS pool features +. +.Sh DESCRIPTION +ZFS pool on-disk format versions are specified via "features" which replace +the old on-disk format numbers (the last supported on-disk format number is 28). +To enable a feature on a pool use the +.Nm zpool Cm upgrade , +or set the +.Sy feature Ns @ Ns Ar feature-name +property to +.Sy enabled . +Please also see the +.Sx Compatibility feature sets +section for information on how sets of features may be enabled together. +.Pp +The pool format does not affect file system version compatibility or the ability +to send file systems between pools. +.Pp +Since most features can be enabled independently of each other, the on-disk +format of the pool is specified by the set of all features marked as +.Sy active +on the pool. +If the pool was created by another software version +this set may include unsupported features. +. +.Ss Identifying features +Every feature has a GUID of the form +.Ar com.example : Ns Ar feature-name . +The reversed DNS name ensures that the feature's GUID is unique across all ZFS +implementations. +When unsupported features are encountered on a pool they will +be identified by their GUIDs. +Refer to the documentation for the ZFS +implementation that created the pool for information about those features. +.Pp +Each supported feature also has a short name. +By convention a feature's short name is the portion of its GUID which follows the +.Sq \&: +(i.e. +.Ar com.example : Ns Ar feature-name +would have the short name +.Ar feature-name ) , +however a feature's short name may differ across ZFS implementations if +following the convention would result in name conflicts. +. +.Ss Feature states +Features can be in one of three states: +.Bl -tag -width "disabled" +.It Sy active +This feature's on-disk format changes are in effect on the pool. +Support for this feature is required to import the pool in read-write mode. +If this feature is not read-only compatible, +support is also required to import the pool in read-only mode +.Pq see Sx Read-only compatibility . +.It Sy enabled +An administrator has marked this feature as enabled on the pool, but the +feature's on-disk format changes have not been made yet. +The pool can still be imported by software that does not support this feature, +but changes may be made to the on-disk format at any time +which will move the feature to the +.Sy active +state. +Some features may support returning to the +.Sy enabled +state after becoming +.Sy active . +See feature-specific documentation for details. +.It Sy disabled +This feature's on-disk format changes have not been made and will not be made +unless an administrator moves the feature to the +.Sy enabled +state. +Features cannot be disabled once they have been enabled. +.El +.Pp +The state of supported features is exposed through pool properties of the form +.Sy feature Ns @ Ns Ar short-name . +. +.Ss Read-only compatibility +Some features may make on-disk format changes that do not interfere with other +software's ability to read from the pool. +These features are referred to as +.Dq read-only compatible . +If all unsupported features on a pool are read-only compatible, +the pool can be imported in read-only mode by setting the +.Sy readonly +property during import (see +.Xr zpool-import 8 +for details on importing pools). +. +.Ss Unsupported features +For each unsupported feature enabled on an imported pool, a pool property +named +.Sy unsupported Ns @ Ns Ar feature-name +will indicate why the import was allowed despite the unsupported feature. +Possible values for this property are: +.Bl -tag -width "readonly" +.It Sy inactive +The feature is in the +.Sy enabled +state and therefore the pool's on-disk +format is still compatible with software that does not support this feature. +.It Sy readonly +The feature is read-only compatible and the pool has been imported in +read-only mode. +.El +. +.Ss Feature dependencies +Some features depend on other features being enabled in order to function. +Enabling a feature will automatically enable any features it depends on. +. +.Ss Compatibility feature sets +It is sometimes necessary for a pool to maintain compatibility with a +specific on-disk format, by enabling and disabling particular features. +The +.Sy compatibility +feature facilitates this by allowing feature sets to be read from text files. +When set to +.Sy off +(the default), compatibility feature sets are disabled +(i.e. all features are enabled); when set to +.Sy legacy , +no features are enabled. +When set to a comma-separated list of filenames +(each filename may either be an absolute path, or relative to +.Pa /etc/zfs/compatibility.d +or +.Pa /usr/share/zfs/compatibility.d ) , +the lists of requested features are read from those files, +separated by whitespace and/or commas. +Only features present in all files are enabled. +.Pp +Simple sanity checks are applied to the files: +they must be between 1B and 16kB in size, and must end with a newline character. +.Pp +The requested features are applied when a pool is created using +.Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar … +and controls which features are enabled when using +.Nm zpool Cm upgrade . +.Nm zpool Cm status +will not show a warning about disabled features which are not part +of the requested feature set. +.Pp +The special value +.Sy legacy +prevents any features from being enabled, either via +.Nm zpool Cm upgrade +or +.Nm zpool Cm set Sy feature Ns @ Ns Ar feature-name Ns = Ns Sy enabled . +This setting also prevents pools from being upgraded to newer on-disk versions. +This is a safety measure to prevent new features from being +accidentally enabled, breaking compatibility. +.Pp +By convention, compatibility files in +.Pa /usr/share/zfs/compatibility.d +are provided by the distribution, and include feature sets +supported by important versions of popular distributions, and feature +sets commonly supported at the start of each year. +Compatibility files in +.Pa /etc/zfs/compatibility.d , +if present, will take precedence over files with the same name in +.Pa /usr/share/zfs/compatibility.d . +.Pp +If an unrecognized feature is found in these files, an error message will +be shown. +If the unrecognized feature is in a file in +.Pa /etc/zfs/compatibility.d , +this is treated as an error and processing will stop. +If the unrecognized feature is under +.Pa /usr/share/zfs/compatibility.d , +this is treated as a warning and processing will continue. +This difference is to allow distributions to include features +which might not be recognized by the currently-installed binaries. +.Pp +Compatibility files may include comments: +any text from +.Sq # +to the end of the line is ignored. +.Pp +.Sy Example : +.Bd -literal -compact -offset 4n +.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2 +# Features which are supported by GRUB2 +async_destroy +bookmarks +embedded_data +empty_bpobj +enabled_txg +extensible_dataset +filesystem_limits +hole_birth +large_blocks +lz4_compress +spacemap_histogram + +.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev +.Ed +.Pp +See +.Xr zpool-create 8 +and +.Xr zpool-upgrade 8 +for more information on how these commands are affected by feature sets. +. +.de feature +.It Sy \\$2 +.Bl -tag -compact -width "READ-ONLY COMPATIBLE" +.It GUID +.Sy \\$1:\\$2 +.if !"\\$4"" \{\ +.It DEPENDENCIES +\fB\\$4\fP\c +.if !"\\$5"" , \fB\\$5\fP\c +.if !"\\$6"" , \fB\\$6\fP\c +.if !"\\$7"" , \fB\\$7\fP\c +.if !"\\$8"" , \fB\\$8\fP\c +.if !"\\$9"" , \fB\\$9\fP\c +.\} +.It READ-ONLY COMPATIBLE +\\$3 +.El +.Pp +.. +. +.ds instant-never \ +.No This feature becomes Sy active No as soon as it is enabled \ +and will never return to being Sy enabled . +. +.ds remount-upgrade \ +.No Each filesystem will be upgraded automatically when remounted, \ +or when a new file is created under that filesystem. \ +The upgrade can also be triggered on filesystems via \ +Nm zfs Cm set Sy version Ns = Ns Sy current Ar fs . \ +No The upgrade process runs in the background and may take a while to complete \ +for filesystems containing large amounts of files. +. +.de checksum-spiel +When the +.Sy \\$1 +feature is set to +.Sy enabled , +the administrator can turn on the +.Sy \\$1 +checksum on any dataset using +.Nm zfs Cm set Sy checksum Ns = Ns Sy \\$1 Ar dset +.Po see Xr zfs-set 8 Pc . +This feature becomes +.Sy active +once a +.Sy checksum +property has been set to +.Sy \\$1 , +and will return to being +.Sy enabled +once all filesystems that have ever had their checksum set to +.Sy \\$1 +are destroyed. +.. +. +.Sh FEATURES +The following features are supported on this system: +.Bl -tag -width Ds +.feature org.zfsonlinux allocation_classes yes +This feature enables support for separate allocation classes. +.Pp +This feature becomes +.Sy active +when a dedicated allocation class vdev (dedup or special) is created with the +.Nm zpool Cm create No or Nm zpool Cm add No commands . +With device removal, it can be returned to the +.Sy enabled +state if all the dedicated allocation class vdevs are removed. +. +.feature com.delphix async_destroy yes +Destroying a file system requires traversing all of its data in order to +return its used space to the pool. +Without +.Sy async_destroy , +the file system is not fully removed until all space has been reclaimed. +If the destroy operation is interrupted by a reboot or power outage, +the next attempt to open the pool will need to complete the destroy +operation synchronously. +.Pp +When +.Sy async_destroy +is enabled, the file system's data will be reclaimed by a background process, +allowing the destroy operation to complete +without traversing the entire file system. +The background process is able to resume +interrupted destroys after the pool has been opened, eliminating the need +to finish interrupted destroys as part of the open operation. +The amount of space remaining to be reclaimed by the background process +is available through the +.Sy freeing +property. +.Pp +This feature is only +.Sy active +while +.Sy freeing +is non-zero. +. +.feature com.delphix bookmarks yes extensible_dataset +This feature enables use of the +.Nm zfs Cm bookmark +command. +.Pp +This feature is +.Sy active +while any bookmarks exist in the pool. +All bookmarks in the pool can be listed by running +.Nm zfs Cm list Fl t Sy bookmark Fl r Ar poolname . +. +.feature com.datto bookmark_v2 no bookmark extensible_dataset +This feature enables the creation and management of larger bookmarks which are +needed for other features in ZFS. +.Pp +This feature becomes +.Sy active +when a v2 bookmark is created and will be returned to the +.Sy enabled +state when all v2 bookmarks are destroyed. +. +.feature com.delphix bookmark_written no bookmark extensible_dataset bookmark_v2 +This feature enables additional bookmark accounting fields, enabling the +.Sy written Ns # Ns Ar bookmark +property (space written since a bookmark) and estimates of +send stream sizes for incrementals from bookmarks. +.Pp +This feature becomes +.Sy active +when a bookmark is created and will be +returned to the +.Sy enabled +state when all bookmarks with these fields are destroyed. +. +.feature org.openzfs device_rebuild yes +This feature enables the ability for the +.Nm zpool Cm attach +and +.Nm zpool Cm replace +commands to perform sequential reconstruction +(instead of healing reconstruction) when resilvering. +.Pp +Sequential reconstruction resilvers a device in LBA order without immediately +verifying the checksums. +Once complete, a scrub is started, which then verifies the checksums. +This approach allows full redundancy to be restored to the pool +in the minimum amount of time. +This two-phase approach will take longer than a healing resilver +when the time to verify the checksums is included. +However, unless there is additional pool damage, +no checksum errors should be reported by the scrub. +This feature is incompatible with raidz configurations. +. +This feature becomes +.Sy active +while a sequential resilver is in progress, and returns to +.Sy enabled +when the resilver completes. +. +.feature com.delphix device_removal no +This feature enables the +.Nm zpool Cm remove +command to remove top-level vdevs, +evacuating them to reduce the total size of the pool. +.Pp +This feature becomes +.Sy active +when the +.Nm zpool Cm remove +command is used +on a top-level vdev, and will never return to being +.Sy enabled . +. +.feature org.openzfs draid no +This feature enables use of the +.Sy draid +vdev type. +dRAID is a variant of raidz which provides integrated distributed +hot spares that allow faster resilvering while retaining the benefits of raidz. +Data, parity, and spare space are organized in redundancy groups +and distributed evenly over all of the devices. +.Pp +This feature becomes +.Sy active +when creating a pool which uses the +.Sy draid +vdev type, or when adding a new +.Sy draid +vdev to an existing pool. +. +.feature org.illumos edonr no extensible_dataset +This feature enables the use of the Edon-R hash algorithm for checksum, +including for nopwrite (if compression is also enabled, an overwrite of +a block whose checksum matches the data being written will be ignored). +In an abundance of caution, Edon-R requires verification when used with +dedup: +.Nm zfs Cm set Sy dedup Ns = Ns Sy edonr , Ns Sy verify +.Po see Xr zfs-set 8 Pc . +.Pp +Edon-R is a very high-performance hash algorithm that was part +of the NIST SHA-3 competition. +It provides extremely high hash performance (over 350% faster than SHA-256), +but was not selected because of its unsuitability +as a general purpose secure hash algorithm. +This implementation utilizes the new salted checksumming functionality +in ZFS, which means that the checksum is pre-seeded with a secret +256-bit random key (stored on the pool) before being fed the data block +to be checksummed. +Thus the produced checksums are unique to a given pool, +preventing hash collision attacks on systems with dedup. +.Pp +.checksum-spiel edonr +.Pp +.Fx does not support the Sy edonr No feature. +. +.feature com.delphix embedded_data no +This feature improves the performance and compression ratio of +highly-compressible blocks. +Blocks whose contents can compress to 112 bytes +or smaller can take advantage of this feature. +.Pp +When this feature is enabled, the contents of highly-compressible blocks are +stored in the block "pointer" itself (a misnomer in this case, as it contains +the compressed data, rather than a pointer to its location on disk). +Thus the space of the block (one sector, typically 512B or 4kB) is saved, +and no additional I/O is needed to read and write the data block. +. +\*[instant-never] +. +.feature com.delphix empty_bpobj yes +This feature increases the performance of creating and using a large +number of snapshots of a single filesystem or volume, and also reduces +the disk space required. +.Pp +When there are many snapshots, each snapshot uses many Block Pointer +Objects (bpobjs) to track blocks associated with that snapshot. +However, in common use cases, most of these bpobjs are empty. +This feature allows us to create each bpobj on-demand, +thus eliminating the empty bpobjs. +.Pp +This feature is +.Sy active +while there are any filesystems, volumes, +or snapshots which were created after enabling this feature. +. +.feature com.delphix enabled_txg yes +Once this feature is enabled, ZFS records the transaction group number +in which new features are enabled. +This has no user-visible impact, but other features may depend on this feature. +.Pp +This feature becomes +.Sy active + as soon as it is enabled and will +never return to being +.Sy enabled . +. +.feature com.datto encryption no bookmark_v2 extensible_dataset +This feature enables the creation and management of natively encrypted datasets. +.Pp +This feature becomes +.Sy active +when an encrypted dataset is created and will be returned to the +.Sy enabled +state when all datasets that use this feature are destroyed. +. +.feature com.delphix extensible_dataset no +This feature allows more flexible use of internal ZFS data structures, +and exists for other features to depend on. +.Pp +This feature will be +.Sy active +when the first dependent feature uses it, and will be returned to the +.Sy enabled +state when all datasets that use this feature are destroyed. +. +.feature com.joyent filesystem_limits yes extensible_dataset +This feature enables filesystem and snapshot limits. +These limits can be used to control how many filesystems and/or snapshots +can be created at the point in the tree on which the limits are set. +.Pp +This feature is +.Sy active +once either of the limit properties has been set on a dataset. +Once activated the feature is never deactivated. +. +.feature com.delphix hole_birth no enabled_txg +This feature has/had bugs, the result of which is that, if you do a +.Nm zfs Cm send Fl i +.Pq or Fl R , No since it uses Fl i +from an affected dataset, the receiving party will not see any checksum +or other errors, but the resulting destination snapshot +will not match the source. +Its use by +.Nm zfs Cm send Fl i +has been disabled by default +.Pq see Sy send_holes_without_birth_time No in Xr zfs 4 . +.Pp +This feature improves performance of incremental sends +.Pq Nm zfs Cm send Fl i +and receives for objects with many holes. +The most common case of hole-filled objects is zvols. +.Pp +An incremental send stream from snapshot +.Sy A No to snapshot Sy B +contains information about every block that changed between +.Sy A No and Sy B . +Blocks which did not change between those snapshots can be +identified and omitted from the stream using a piece of metadata called +the "block birth time", but birth times are not recorded for holes +(blocks filled only with zeroes). +Since holes created after +.Sy A No cannot be distinguished from holes created before Sy A , +information about every hole in the entire filesystem or zvol +is included in the send stream. +.Pp +For workloads where holes are rare this is not a problem. +However, when incrementally replicating filesystems or zvols with many holes +(for example a zvol formatted with another filesystem) a lot of time will +be spent sending and receiving unnecessary information about holes that +already exist on the receiving side. +.Pp +Once the +.Sy hole_birth +feature has been enabled the block birth times +of all new holes will be recorded. +Incremental sends between snapshots created after this feature is enabled +will use this new metadata to avoid sending information about holes that +already exist on the receiving side. +.Pp +\*[instant-never] +. +.feature org.open-zfs large_blocks no extensible_dataset +This feature allows the record size on a dataset to be set larger than 128kB. +.Pp +This feature becomes +.Sy active +once a dataset contains a file with a block size larger than 128kB, +and will return to being +.Sy enabled +once all filesystems that have ever had their recordsize larger than 128kB +are destroyed. +. +.feature org.zfsonlinux large_dnode no extensible_dataset +This feature allows the size of dnodes in a dataset to be set larger than 512B. +. +This feature becomes +.Sy active +once a dataset contains an object with a dnode larger than 512B, +which occurs as a result of setting the +.Sy dnodesize +dataset property to a value other than +.Sy legacy . +The feature will return to being +.Sy enabled +once all filesystems that have ever contained a dnode larger than 512B +are destroyed. +Large dnodes allow more data to be stored in the bonus buffer, +thus potentially improving performance by avoiding the use of spill blocks. +. +.feature com.delphix livelist yes +This feature allows clones to be deleted faster than the traditional method +when a large number of random/sparse writes have been made to the clone. +All blocks allocated and freed after a clone is created are tracked by the +the clone's livelist which is referenced during the deletion of the clone. +The feature is activated when a clone is created and remains +.Sy active +until all clones have been destroyed. +. +.feature com.delphix log_spacemap yes com.delphix:spacemap_v2 +This feature improves performance for heavily-fragmented pools, +especially when workloads are heavy in random-writes. +It does so by logging all the metaslab changes on a single spacemap every TXG +instead of scattering multiple writes to all the metaslab spacemaps. +.Pp +\*[instant-never] +. +.feature org.illumos lz4_compress no +.Sy lz4 +is a high-performance real-time compression algorithm that +features significantly faster compression and decompression as well as a +higher compression ratio than the older +.Sy lzjb +compression. +Typically, +.Sy lz4 +compression is approximately 50% faster on compressible data and 200% faster +on incompressible data than +.Sy lzjb . +It is also approximately 80% faster on decompression, +while giving approximately a 10% better compression ratio. +.Pp +When the +.Sy lz4_compress +feature is set to +.Sy enabled , +the administrator can turn on +.Sy lz4 +compression on any dataset on the pool using the +.Xr zfs-set 8 +command. +All newly written metadata will be compressed with the +.Sy lz4 +algorithm. +.Pp +\*[instant-never] +. +.feature com.joyent multi_vdev_crash_dump no +This feature allows a dump device to be configured with a pool comprised +of multiple vdevs. +Those vdevs may be arranged in any mirrored or raidz configuration. +.Pp +When the +.Sy multi_vdev_crash_dump +feature is set to +.Sy enabled , +the administrator can use +.Xr dumpadm 1M +to configure a dump device on a pool comprised of multiple vdevs. +.Pp +Under +.Fx +and Linux this feature is unused, but registered for compatibility. +New pools created on these systems will have the feature +.Sy enabled +but will never transition to +.Sy active , +as this functionality is not required for crash dump support. +Existing pools where this feature is +.Sy active +can be imported. +. +.feature com.delphix obsolete_counts yes device_removal +This feature is an enhancement of +.Sy device_removal , +which will over time reduce the memory used to track removed devices. +When indirect blocks are freed or remapped, +we note that their part of the indirect mapping is "obsolete" – no longer needed. +.Pp +This feature becomes +.Sy active +when the +.Nm zpool Cm remove +command is used on a top-level vdev, and will never return to being +.Sy enabled . +. +.feature org.zfsonlinux project_quota yes extensible_dataset +This feature allows administrators to account the spaces and objects usage +information against the project identifier (ID). +.Pp +The project ID is an object-based attribute. +When upgrading an existing filesystem, +objects without a project ID will be assigned a zero project ID. +When this feature is enabled, newly created objects inherit +their parent directories' project ID if the parent's inherit flag is set +.Pq via Nm chattr Sy [+-]P No or Nm zfs Cm project Fl s Ns | Ns Fl C . +Otherwise, the new object's project ID will be zero. +An object's project ID can be changed at any time by the owner +(or privileged user) via +.Nm chattr Fl p Ar prjid +or +.Nm zfs Cm project Fl p Ar prjid . +.Pp +This feature will become +.Sy active +as soon as it is enabled and will never return to being +.Sy disabled . +\*[remount-upgrade] +. +.feature com.delphix redaction_bookmarks no bookmarks extensible_dataset +This feature enables the use of redacted +.Nm zfs Cm send Ns s , +which create redaction bookmarks storing the list of blocks +redacted by the send that created them. +For more information about redacted sends, see +.Xr zfs-send 8 . +. +.feature com.delphix redacted_datasets no extensible_dataset +This feature enables the receiving of redacted +.Nm zfs Cm send Ns +streams. which create redacted datasets when received. +These datasets are missing some of their blocks, +and so cannot be safely mounted, and their contents cannot be safely read. +For more information about redacted receives, see +.Xr zfs-send 8 . +. +.feature com.datto resilver_defer yes +This feature allows ZFS to postpone new resilvers if an existing one is already +in progress. +Without this feature, any new resilvers will cause the currently +running one to be immediately restarted from the beginning. +.Pp +This feature becomes +.Sy active +once a resilver has been deferred, and returns to being +.Sy enabled +when the deferred resilver begins. +. +.feature org.illumos sha512 no extensible_dataset +This feature enables the use of the SHA-512/256 truncated hash algorithm +(FIPS 180-4) for checksum and dedup. +The native 64-bit arithmetic of SHA-512 provides an approximate 50% +performance boost over SHA-256 on 64-bit hardware +and is thus a good minimum-change replacement candidate +for systems where hash performance is important, +but these systems cannot for whatever reason utilize the faster +.Sy skein No and Sy edonr +algorithms. +.Pp +.checksum-spiel sha512 +. +.feature org.illumos skein no extensible_dataset +This feature enables the use of the Skein hash algorithm for checksum and dedup. +Skein is a high-performance secure hash algorithm that was a +finalist in the NIST SHA-3 competition. +It provides a very high security margin and high performance on 64-bit hardware +(80% faster than SHA-256). +This implementation also utilizes the new salted checksumming +functionality in ZFS, which means that the checksum is pre-seeded with a +secret 256-bit random key (stored on the pool) before being fed the data +block to be checksummed. +Thus the produced checksums are unique to a given pool, +preventing hash collision attacks on systems with dedup. +.Pp +.checksum-spiel skein +. +.feature com.delphix spacemap_histogram yes +This features allows ZFS to maintain more information about how free space +is organized within the pool. +If this feature is +.Sy enabled , +it will be activated when a new space map object is created, or +an existing space map is upgraded to the new format, +and never returns back to being +.Sy enabled . +. +.feature com.delphix spacemap_v2 yes +This feature enables the use of the new space map encoding which +consists of two words (instead of one) whenever it is advantageous. +The new encoding allows space maps to represent large regions of +space more efficiently on-disk while also increasing their maximum +addressable offset. +.Pp +This feature becomes +.Sy active +once it is +.Sy enabled , +and never returns back to being +.Sy enabled . +. +.feature org.zfsonlinux userobj_accounting yes extensible_dataset +This feature allows administrators to account the object usage information +by user and group. +.Pp +\*[instant-never] +\*[remount-upgrade] +. +.feature com.delphix zpool_checkpoint yes +This feature enables the +.Nm zpool Cm checkpoint +command that can checkpoint the state of the pool +at the time it was issued and later rewind back to it or discard it. +.Pp +This feature becomes +.Sy active +when the +.Nm zpool Cm checkpoint +command is used to checkpoint the pool. +The feature will only return back to being +.Sy enabled +when the pool is rewound or the checkpoint has been discarded. +. +.feature org.freebsd zstd_compress no extensible_dataset +.Sy zstd +is a high-performance compression algorithm that features a +combination of high compression ratios and high speed. +Compared to +.Sy gzip , +.Sy zstd +offers slightly better compression at much higher speeds. +Compared to +.Sy lz4 , +.Sy zstd +offers much better compression while being only modestly slower. +Typically, +.Sy zstd +compression speed ranges from 250 to 500 MB/s per thread +and decompression speed is over 1 GB/s per thread. +.Pp +When the +.Sy zstd +feature is set to +.Sy enabled , +the administrator can turn on +.Sy zstd +compression of any dataset using +.Nm zfs Cm set Sy compress Ns = Ns Sy zstd Ar dset +.Po see Xr zfs-set 8 Pc . +This feature becomes +.Sy active +once a +.Sy compress +property has been set to +.Sy zstd , +and will return to being +.Sy enabled +once all filesystems that have ever had their +.Sy compress +property set to +.Sy zstd +are destroyed. +.El +. +.Sh SEE ALSO +.Xr zpool 8 diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 new file mode 100644 index 000000000000..58132baf5025 --- /dev/null +++ b/man/man7/zpoolconcepts.7 @@ -0,0 +1,512 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. +.\" Copyright (c) 2017 Datto Inc. +.\" Copyright (c) 2018 George Melikov. All Rights Reserved. +.\" Copyright 2017 Nexenta Systems, Inc. +.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +.\" +.Dd June 2, 2021 +.Dt ZPOOLCONCEPTS 7 +.Os +. +.Sh NAME +.Nm zpoolconcepts +.Nd overview of ZFS storage pools +. +.Sh DESCRIPTION +.Ss Virtual Devices (vdevs) +A "virtual device" describes a single device or a collection of devices +organized according to certain performance and fault characteristics. +The following virtual devices are supported: +.Bl -tag -width "special" +.It Sy disk +A block device, typically located under +.Pa /dev . +ZFS can use individual slices or partitions, though the recommended mode of +operation is to use whole disks. +A disk can be specified by a full path, or it can be a shorthand name +.Po the relative portion of the path under +.Pa /dev +.Pc . +A whole disk can be specified by omitting the slice or partition designation. +For example, +.Pa sda +is equivalent to +.Pa /dev/sda . +When given a whole disk, ZFS automatically labels the disk, if necessary. +.It Sy file +A regular file. +The use of files as a backing store is strongly discouraged. +It is designed primarily for experimental purposes, as the fault tolerance of a +file is only as good as the file system on which it resides. +A file must be specified by a full path. +.It Sy mirror +A mirror of two or more devices. +Data is replicated in an identical fashion across all components of a mirror. +A mirror with +.Em N No disks of size Em X No can hold Em X No bytes and can withstand Em N-1 +devices failing without losing data. +.It Sy raidz , raidz1 , raidz2 , raidz3 +A variation on RAID-5 that allows for better distribution of parity and +eliminates the RAID-5 +.Qq write hole +.Pq in which data and parity become inconsistent after a power loss . +Data and parity is striped across all disks within a raidz group. +.Pp +A raidz group can have single, double, or triple parity, meaning that the +raidz group can sustain one, two, or three failures, respectively, without +losing any data. +The +.Sy raidz1 +vdev type specifies a single-parity raidz group; the +.Sy raidz2 +vdev type specifies a double-parity raidz group; and the +.Sy raidz3 +vdev type specifies a triple-parity raidz group. +The +.Sy raidz +vdev type is an alias for +.Sy raidz1 . +.Pp +A raidz group with +.Em N No disks of size Em X No with Em P No parity disks can hold approximately +.Em (N-P)*X No bytes and can withstand Em P No devices failing without losing data. +The minimum number of devices in a raidz group is one more than the number of +parity disks. +The recommended number is between 3 and 9 to help increase performance. +.It Sy draid , draid1 , draid2 , draid3 +A variant of raidz that provides integrated distributed hot spares which +allows for faster resilvering while retaining the benefits of raidz. +A dRAID vdev is constructed from multiple internal raidz groups, each with +.Em D No data devices and Em P No parity devices. +These groups are distributed over all of the children in order to fully +utilize the available disk performance. +.Pp +Unlike raidz, dRAID uses a fixed stripe width (padding as necessary with +zeros) to allow fully sequential resilvering. +This fixed stripe width significantly effects both usable capacity and IOPS. +For example, with the default +.Em D=8 No and Em 4kB No disk sectors the minimum allocation size is Em 32kB . +If using compression, this relatively large allocation size can reduce the +effective compression ratio. +When using ZFS volumes and dRAID, the default of the +.Sy volblocksize +property is increased to account for the allocation size. +If a dRAID pool will hold a significant amount of small blocks, it is +recommended to also add a mirrored +.Sy special +vdev to store those blocks. +.Pp +In regards to I/O, performance is similar to raidz since for any read all +.Em D No data disks must be accessed. +Delivered random IOPS can be reasonably approximated as +.Sy floor((N-S)/(D+P))*single_drive_IOPS . +.Pp +Like raidzm a dRAID can have single-, double-, or triple-parity. +The +.Sy draid1 , +.Sy draid2 , +and +.Sy draid3 +types can be used to specify the parity level. +The +.Sy draid +vdev type is an alias for +.Sy draid1 . +.Pp +A dRAID with +.Em N No disks of size Em X , D No data disks per redundancy group, Em P +.No parity level, and Em S No distributed hot spares can hold approximately +.Em (N-S)*(D/(D+P))*X No bytes and can withstand Em P +devices failing without losing data. +.It Sy draid Ns Oo Ar parity Oc Ns Oo Sy \&: Ns Ar data Ns Sy d Oc Ns Oo Sy \&: Ns Ar children Ns Sy c Oc Ns Oo Sy \&: Ns Ar spares Ns Sy s Oc +A non-default dRAID configuration can be specified by appending one or more +of the following optional arguments to the +.Sy draid +keyword: +.Bl -tag -compact -width "children" +.It Ar parity +The parity level (1-3). +.It Ar data +The number of data devices per redundancy group. +In general, a smaller value of +.Em D No will increase IOPS, improve the compression ratio, +and speed up resilvering at the expense of total usable capacity. +Defaults to +.Em 8 , No unless Em N-P-S No is less than Em 8 . +.It Ar children +The expected number of children. +Useful as a cross-check when listing a large number of devices. +An error is returned when the provided number of children differs. +.It Ar spares +The number of distributed hot spares. +Defaults to zero. +.El +.It Sy spare +A pseudo-vdev which keeps track of available hot spares for a pool. +For more information, see the +.Sx Hot Spares +section. +.It Sy log +A separate intent log device. +If more than one log device is specified, then writes are load-balanced between +devices. +Log devices can be mirrored. +However, raidz vdev types are not supported for the intent log. +For more information, see the +.Sx Intent Log +section. +.It Sy dedup +A device dedicated solely for deduplication tables. +The redundancy of this device should match the redundancy of the other normal +devices in the pool. +If more than one dedup device is specified, then +allocations are load-balanced between those devices. +.It Sy special +A device dedicated solely for allocating various kinds of internal metadata, +and optionally small file blocks. +The redundancy of this device should match the redundancy of the other normal +devices in the pool. +If more than one special device is specified, then +allocations are load-balanced between those devices. +.Pp +For more information on special allocations, see the +.Sx Special Allocation Class +section. +.It Sy cache +A device used to cache storage pool data. +A cache device cannot be configured as a mirror or raidz group. +For more information, see the +.Sx Cache Devices +section. +.El +.Pp +Virtual devices cannot be nested, so a mirror or raidz virtual device can only +contain files or disks. +Mirrors of mirrors +.Pq or other combinations +are not allowed. +.Pp +A pool can have any number of virtual devices at the top of the configuration +.Po known as +.Qq root vdevs +.Pc . +Data is dynamically distributed across all top-level devices to balance data +among devices. +As new virtual devices are added, ZFS automatically places data on the newly +available devices. +.Pp +Virtual devices are specified one at a time on the command line, +separated by whitespace. +Keywords like +.Sy mirror No and Sy raidz +are used to distinguish where a group ends and another begins. +For example, the following creates a pool with two root vdevs, +each a mirror of two disks: +.Dl # Nm zpool Cm create Ar mypool Sy mirror Ar sda sdb Sy mirror Ar sdc sdd +. +.Ss Device Failure and Recovery +ZFS supports a rich set of mechanisms for handling device failure and data +corruption. +All metadata and data is checksummed, and ZFS automatically repairs bad data +from a good copy when corruption is detected. +.Pp +In order to take advantage of these features, a pool must make use of some form +of redundancy, using either mirrored or raidz groups. +While ZFS supports running in a non-redundant configuration, where each root +vdev is simply a disk or file, this is strongly discouraged. +A single case of bit corruption can render some or all of your data unavailable. +.Pp +A pool's health status is described by one of three states: +.Sy online , degraded , No or Sy faulted . +An online pool has all devices operating normally. +A degraded pool is one in which one or more devices have failed, but the data is +still available due to a redundant configuration. +A faulted pool has corrupted metadata, or one or more faulted devices, and +insufficient replicas to continue functioning. +.Pp +The health of the top-level vdev, such as a mirror or raidz device, +is potentially impacted by the state of its associated vdevs, +or component devices. +A top-level vdev or component device is in one of the following states: +.Bl -tag -width "DEGRADED" +.It Sy DEGRADED +One or more top-level vdevs is in the degraded state because one or more +component devices are offline. +Sufficient replicas exist to continue functioning. +.Pp +One or more component devices is in the degraded or faulted state, but +sufficient replicas exist to continue functioning. +The underlying conditions are as follows: +.Bl -bullet -compact +.It +The number of checksum errors exceeds acceptable levels and the device is +degraded as an indication that something may be wrong. +ZFS continues to use the device as necessary. +.It +The number of I/O errors exceeds acceptable levels. +The device could not be marked as faulted because there are insufficient +replicas to continue functioning. +.El +.It Sy FAULTED +One or more top-level vdevs is in the faulted state because one or more +component devices are offline. +Insufficient replicas exist to continue functioning. +.Pp +One or more component devices is in the faulted state, and insufficient +replicas exist to continue functioning. +The underlying conditions are as follows: +.Bl -bullet -compact +.It +The device could be opened, but the contents did not match expected values. +.It +The number of I/O errors exceeds acceptable levels and the device is faulted to +prevent further use of the device. +.El +.It Sy OFFLINE +The device was explicitly taken offline by the +.Nm zpool Cm offline +command. +.It Sy ONLINE +The device is online and functioning. +.It Sy REMOVED +The device was physically removed while the system was running. +Device removal detection is hardware-dependent and may not be supported on all +platforms. +.It Sy UNAVAIL +The device could not be opened. +If a pool is imported when a device was unavailable, then the device will be +identified by a unique identifier instead of its path since the path was never +correct in the first place. +.El +.Pp +Checksum errors represent events where a disk returned data that was expected +to be correct, but was not. +In other words, these are instances of silent data corruption. +The checksum errors are reported in +.Nm zpool Cm status +and +.Nm zpool Cm events . +When a block is stored redundantly, a damaged block may be reconstructed +(e.g. from raidz parity or a mirrored copy). +In this case, ZFS reports the checksum error against the disks that contained +damaged data. +If a block is unable to be reconstructed (e.g. due to 3 disks being damaged +in a raidz2 group), it is not possible to determine which disks were silently +corrupted. +In this case, checksum errors are reported for all disks on which the block +is stored. +.Pp +If a device is removed and later re-attached to the system, +ZFS attempts online the device automatically. +Device attachment detection is hardware-dependent +and might not be supported on all platforms. +. +.Ss Hot Spares +ZFS allows devices to be associated with pools as +.Qq hot spares . +These devices are not actively used in the pool, but when an active device +fails, it is automatically replaced by a hot spare. +To create a pool with hot spares, specify a +.Sy spare +vdev with any number of devices. +For example, +.Dl # Nm zpool Cm create Ar pool Sy mirror Ar sda sdb Sy spare Ar sdc sdd +.Pp +Spares can be shared across multiple pools, and can be added with the +.Nm zpool Cm add +command and removed with the +.Nm zpool Cm remove +command. +Once a spare replacement is initiated, a new +.Sy spare +vdev is created within the configuration that will remain there until the +original device is replaced. +At this point, the hot spare becomes available again if another device fails. +.Pp +If a pool has a shared spare that is currently being used, the pool can not be +exported since other pools may use this shared spare, which may lead to +potential data corruption. +.Pp +Shared spares add some risk. +If the pools are imported on different hosts, +and both pools suffer a device failure at the same time, +both could attempt to use the spare at the same time. +This may not be detected, resulting in data corruption. +.Pp +An in-progress spare replacement can be cancelled by detaching the hot spare. +If the original faulted device is detached, then the hot spare assumes its +place in the configuration, and is removed from the spare list of all active +pools. +.Pp +The +.Sy draid +vdev type provides distributed hot spares. +These hot spares are named after the dRAID vdev they're a part of +.Po Sy draid1 Ns - Ns Ar 2 Ns - Ns Ar 3 No specifies spare Ar 3 No of vdev Ar 2 , +.No which is a single parity dRAID Pc +and may only be used by that dRAID vdev. +Otherwise, they behave the same as normal hot spares. +.Pp +Spares cannot replace log devices. +. +.Ss Intent Log +The ZFS Intent Log (ZIL) satisfies POSIX requirements for synchronous +transactions. +For instance, databases often require their transactions to be on stable storage +devices when returning from a system call. +NFS and other applications can also use +.Xr fsync 2 +to ensure data stability. +By default, the intent log is allocated from blocks within the main pool. +However, it might be possible to get better performance using separate intent +log devices such as NVRAM or a dedicated disk. +For example: +.Dl # Nm zpool Cm create Ar pool sda sdb Sy log Ar sdc +.Pp +Multiple log devices can also be specified, and they can be mirrored. +See the +.Sx EXAMPLES +section for an example of mirroring multiple log devices. +.Pp +Log devices can be added, replaced, attached, detached and removed. +In addition, log devices are imported and exported as part of the pool +that contains them. +Mirrored devices can be removed by specifying the top-level mirror vdev. +. +.Ss Cache Devices +Devices can be added to a storage pool as +.Qq cache devices . +These devices provide an additional layer of caching between main memory and +disk. +For read-heavy workloads, where the working set size is much larger than what +can be cached in main memory, using cache devices allows much more of this +working set to be served from low latency media. +Using cache devices provides the greatest performance improvement for random +read-workloads of mostly static content. +.Pp +To create a pool with cache devices, specify a +.Sy cache +vdev with any number of devices. +For example: +.Dl # Nm zpool Cm create Ar pool sda sdb Sy cache Ar sdc sdd +.Pp +Cache devices cannot be mirrored or part of a raidz configuration. +If a read error is encountered on a cache device, that read I/O is reissued to +the original storage pool device, which might be part of a mirrored or raidz +configuration. +.Pp +The content of the cache devices is persistent across reboots and restored +asynchronously when importing the pool in L2ARC (persistent L2ARC). +This can be disabled by setting +.Sy l2arc_rebuild_enabled Ns = Ns Sy 0 . +For cache devices smaller than +.Em 1GB , +we do not write the metadata structures +required for rebuilding the L2ARC in order not to waste space. +This can be changed with +.Sy l2arc_rebuild_blocks_min_l2size . +The cache device header +.Pq Em 512B +is updated even if no metadata structures are written. +Setting +.Sy l2arc_headroom Ns = Ns Sy 0 +will result in scanning the full-length ARC lists for cacheable content to be +written in L2ARC (persistent ARC). +If a cache device is added with +.Nm zpool Cm add +its label and header will be overwritten and its contents are not going to be +restored in L2ARC, even if the device was previously part of the pool. +If a cache device is onlined with +.Nm zpool Cm online +its contents will be restored in L2ARC. +This is useful in case of memory pressure +where the contents of the cache device are not fully restored in L2ARC. +The user can off- and online the cache device when there is less memory pressure +in order to fully restore its contents to L2ARC. +. +.Ss Pool checkpoint +Before starting critical procedures that include destructive actions +.Pq like Nm zfs Cm destroy , +an administrator can checkpoint the pool's state and in the case of a +mistake or failure, rewind the entire pool back to the checkpoint. +Otherwise, the checkpoint can be discarded when the procedure has completed +successfully. +.Pp +A pool checkpoint can be thought of as a pool-wide snapshot and should be used +with care as it contains every part of the pool's state, from properties to vdev +configuration. +Thus, certain operations are not allowed while a pool has a checkpoint. +Specifically, vdev removal/attach/detach, mirror splitting, and +changing the pool's GUID. +Adding a new vdev is supported, but in the case of a rewind it will have to be +added again. +Finally, users of this feature should keep in mind that scrubs in a pool that +has a checkpoint do not repair checkpointed data. +.Pp +To create a checkpoint for a pool: +.Dl # Nm zpool Cm checkpoint Ar pool +.Pp +To later rewind to its checkpointed state, you need to first export it and +then rewind it during import: +.Dl # Nm zpool Cm export Ar pool +.Dl # Nm zpool Cm import Fl -rewind-to-checkpoint Ar pool +.Pp +To discard the checkpoint from a pool: +.Dl # Nm zpool Cm checkpoint Fl d Ar pool +.Pp +Dataset reservations (controlled by the +.Sy reservation No and Sy refreservation +properties) may be unenforceable while a checkpoint exists, because the +checkpoint is allowed to consume the dataset's reservation. +Finally, data that is part of the checkpoint but has been freed in the +current state of the pool won't be scanned during a scrub. +. +.Ss Special Allocation Class +Allocations in the special class are dedicated to specific block types. +By default this includes all metadata, the indirect blocks of user data, and +any deduplication tables. +The class can also be provisioned to accept small file blocks. +.Pp +A pool must always have at least one normal +.Pq non- Ns Sy dedup Ns /- Ns Sy special +vdev before +other devices can be assigned to the special class. +If the +.Sy special +class becomes full, then allocations intended for it +will spill back into the normal class. +.Pp +Deduplication tables can be excluded from the special class by unsetting the +.Sy zfs_ddt_data_is_special +ZFS module parameter. +.Pp +Inclusion of small file blocks in the special class is opt-in. +Each dataset can control the size of small file blocks allowed +in the special class by setting the +.Sy special_small_blocks +property to nonzero. +See +.Xr zfsprops 7 +for more info on this property. diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7 new file mode 100644 index 000000000000..513f02e0314f --- /dev/null +++ b/man/man7/zpoolprops.7 @@ -0,0 +1,412 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. +.\" Copyright (c) 2017 Datto Inc. +.\" Copyright (c) 2018 George Melikov. All Rights Reserved. +.\" Copyright 2017 Nexenta Systems, Inc. +.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +.\" Copyright (c) 2021, Colm Buckley +.\" +.Dd May 27, 2021 +.Dt ZPOOLPROPS 7 +.Os +. +.Sh NAME +.Nm zpoolprops +.Nd properties of ZFS storage pools +. +.Sh DESCRIPTION +Each pool has several properties associated with it. +Some properties are read-only statistics while others are configurable and +change the behavior of the pool. +.Pp +The following are read-only properties: +.Bl -tag -width "unsupported@guid" +.It Cm allocated +Amount of storage used within the pool. +See +.Sy fragmentation +and +.Sy free +for more information. +.It Sy capacity +Percentage of pool space used. +This property can also be referred to by its shortened column name, +.Sy cap . +.It Sy expandsize +Amount of uninitialized space within the pool or device that can be used to +increase the total capacity of the pool. +On whole-disk vdevs, this is the space beyond the end of the GPT – +typically occurring when a LUN is dynamically expanded +or a disk replaced with a larger one. +On partition vdevs, this is the space appended to the partition after it was +added to the pool – most likely by resizing it in-place. +The space can be claimed for the pool by bringing it online with +.Sy autoexpand=on +or using +.Nm zpool Cm online Fl e . +.It Sy fragmentation +The amount of fragmentation in the pool. +As the amount of space +.Sy allocated +increases, it becomes more difficult to locate +.Sy free +space. +This may result in lower write performance compared to pools with more +unfragmented free space. +.It Sy free +The amount of free space available in the pool. +By contrast, the +.Xr zfs 8 +.Sy available +property describes how much new data can be written to ZFS filesystems/volumes. +The zpool +.Sy free +property is not generally useful for this purpose, and can be substantially more than the zfs +.Sy available +space. +This discrepancy is due to several factors, including raidz parity; +zfs reservation, quota, refreservation, and refquota properties; and space set aside by +.Sy spa_slop_shift +(see +.Xr zfs 4 +for more information). +.It Sy freeing +After a file system or snapshot is destroyed, the space it was using is +returned to the pool asynchronously. +.Sy freeing +is the amount of space remaining to be reclaimed. +Over time +.Sy freeing +will decrease while +.Sy free +increases. +.It Sy health +The current health of the pool. +Health can be one of +.Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL . +.It Sy guid +A unique identifier for the pool. +.It Sy load_guid +A unique identifier for the pool. +Unlike the +.Sy guid +property, this identifier is generated every time we load the pool (i.e. does +not persist across imports/exports) and never changes while the pool is loaded +(even if a +.Sy reguid +operation takes place). +.It Sy size +Total size of the storage pool. +.It Sy unsupported@ Ns Em guid +Information about unsupported features that are enabled on the pool. +See +.Xr zpool-features 7 +for details. +.El +.Pp +The space usage properties report actual physical space available to the +storage pool. +The physical space can be different from the total amount of space that any +contained datasets can actually use. +The amount of space used in a raidz configuration depends on the characteristics +of the data being written. +In addition, ZFS reserves some space for internal accounting that the +.Xr zfs 8 +command takes into account, but the +.Nm +command does not. +For non-full pools of a reasonable size, these effects should be invisible. +For small pools, or pools that are close to being completely full, these +discrepancies may become more noticeable. +.Pp +The following property can be set at creation time and import time: +.Bl -tag -width Ds +.It Sy altroot +Alternate root directory. +If set, this directory is prepended to any mount points within the pool. +This can be used when examining an unknown pool where the mount points cannot be +trusted, or in an alternate boot environment, where the typical paths are not +valid. +.Sy altroot +is not a persistent property. +It is valid only while the system is up. +Setting +.Sy altroot +defaults to using +.Sy cachefile Ns = Ns Sy none , +though this may be overridden using an explicit setting. +.El +.Pp +The following property can be set only at import time: +.Bl -tag -width Ds +.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off +If set to +.Sy on , +the pool will be imported in read-only mode. +This property can also be referred to by its shortened column name, +.Sy rdonly . +.El +.Pp +The following properties can be set at creation time and import time, and later +changed with the +.Nm zpool Cm set +command: +.Bl -tag -width Ds +.It Sy ashift Ns = Ns Sy ashift +Pool sector size exponent, to the power of +.Sy 2 +(internally referred to as +.Sy ashift ) . +Values from 9 to 16, inclusive, are valid; also, the +value 0 (the default) means to auto-detect using the kernel's block +layer and a ZFS internal exception list. +I/O operations will be aligned to the specified size boundaries. +Additionally, the minimum (disk) +write size will be set to the specified size, so this represents a +space vs. performance trade-off. +For optimal performance, the pool sector size should be greater than +or equal to the sector size of the underlying disks. +The typical case for setting this property is when +performance is important and the underlying disks use 4KiB sectors but +report 512B sectors to the OS (for compatibility reasons); in that +case, set +.Sy ashift Ns = Ns Sy 12 +(which is +.Sy 1<<12 No = Sy 4096 ) . +When set, this property is +used as the default hint value in subsequent vdev operations (add, +attach and replace). +Changing this value will not modify any existing +vdev, not even on disk replacement; however it can be used, for +instance, to replace a dying 512B sectors disk with a newer 4KiB +sectors device: this will probably result in bad performance but at the +same time could prevent loss of data. +.It Sy autoexpand Ns = Ns Sy on Ns | Ns Sy off +Controls automatic pool expansion when the underlying LUN is grown. +If set to +.Sy on , +the pool will be resized according to the size of the expanded device. +If the device is part of a mirror or raidz then all devices within that +mirror/raidz group must be expanded before the new space is made available to +the pool. +The default behavior is +.Sy off . +This property can also be referred to by its shortened column name, +.Sy expand . +.It Sy autoreplace Ns = Ns Sy on Ns | Ns Sy off +Controls automatic device replacement. +If set to +.Sy off , +device replacement must be initiated by the administrator by using the +.Nm zpool Cm replace +command. +If set to +.Sy on , +any new device, found in the same physical location as a device that previously +belonged to the pool, is automatically formatted and replaced. +The default behavior is +.Sy off . +This property can also be referred to by its shortened column name, +.Sy replace . +Autoreplace can also be used with virtual disks (like device +mapper) provided that you use the /dev/disk/by-vdev paths setup by +vdev_id.conf. +See the +.Xr vdev_id 8 +manual page for more details. +Autoreplace and autoonline require the ZFS Event Daemon be configured and +running. +See the +.Xr zed 8 +manual page for more details. +.It Sy autotrim Ns = Ns Sy on Ns | Ns Sy off +When set to +.Sy on +space which has been recently freed, and is no longer allocated by the pool, +will be periodically trimmed. +This allows block device vdevs which support +BLKDISCARD, such as SSDs, or file vdevs on which the underlying file system +supports hole-punching, to reclaim unused blocks. +The default value for this property is +.Sy off . +.Pp +Automatic TRIM does not immediately reclaim blocks after a free. +Instead, it will optimistically delay allowing smaller ranges to be aggregated +into a few larger ones. +These can then be issued more efficiently to the storage. +TRIM on L2ARC devices is enabled by setting +.Sy l2arc_trim_ahead > 0 . +.Pp +Be aware that automatic trimming of recently freed data blocks can put +significant stress on the underlying storage devices. +This will vary depending of how well the specific device handles these commands. +For lower-end devices it is often possible to achieve most of the benefits +of automatic trimming by running an on-demand (manual) TRIM periodically +using the +.Nm zpool Cm trim +command. +.It Sy bootfs Ns = Ns Sy (unset) Ns | Ns Ar pool Ns Op / Ns Ar dataset +Identifies the default bootable dataset for the root pool. +This property is expected to be set mainly by the installation and upgrade programs. +Not all Linux distribution boot processes use the bootfs property. +.It Sy cachefile Ns = Ns Ar path Ns | Ns Sy none +Controls the location of where the pool configuration is cached. +Discovering all pools on system startup requires a cached copy of the +configuration data that is stored on the root file system. +All pools in this cache are automatically imported when the system boots. +Some environments, such as install and clustering, need to cache this +information in a different location so that pools are not automatically +imported. +Setting this property caches the pool configuration in a different location that +can later be imported with +.Nm zpool Cm import Fl c . +Setting it to the value +.Sy none +creates a temporary pool that is never cached, and the +.Qq +.Pq empty string +uses the default location. +.Pp +Multiple pools can share the same cache file. +Because the kernel destroys and recreates this file when pools are added and +removed, care should be taken when attempting to access this file. +When the last pool using a +.Sy cachefile +is exported or destroyed, the file will be empty. +.It Sy comment Ns = Ns Ar text +A text string consisting of printable ASCII characters that will be stored +such that it is available even if the pool becomes faulted. +An administrator can provide additional information about a pool using this +property. +.It Sy compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns … +Specifies that the pool maintain compatibility with specific feature sets. +When set to +.Sy off +(or unset) compatibility is disabled (all features may be enabled); when set to +.Sy legacy Ns +no features may be enabled. +When set to a comma-separated list of filenames +(each filename may either be an absolute path, or relative to +.Pa /etc/zfs/compatibility.d +or +.Pa /usr/share/zfs/compatibility.d ) +the lists of requested features are read from those files, separated by +whitespace and/or commas. +Only features present in all files may be enabled. +.Pp +See +.Xr zpool-features 7 , +.Xr zpool-create 8 +and +.Xr zpool-upgrade 8 +for more information on the operation of compatibility feature sets. +.It Sy dedupditto Ns = Ns Ar number +This property is deprecated and no longer has any effect. +.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off +Controls whether a non-privileged user is granted access based on the dataset +permissions defined on the dataset. +See +.Xr zfs 8 +for more information on ZFS delegated administration. +.It Sy failmode Ns = Ns Sy wait Ns | Ns Sy continue Ns | Ns Sy panic +Controls the system behavior in the event of catastrophic pool failure. +This condition is typically a result of a loss of connectivity to the underlying +storage device(s) or a failure of all devices within the pool. +The behavior of such an event is determined as follows: +.Bl -tag -width "continue" +.It Sy wait +Blocks all I/O access until the device connectivity is recovered and the errors +are cleared. +This is the default behavior. +.It Sy continue +Returns +.Er EIO +to any new write I/O requests but allows reads to any of the remaining healthy +devices. +Any write requests that have yet to be committed to disk would be blocked. +.It Sy panic +Prints out a message to the console and generates a system crash dump. +.El +.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled +The value of this property is the current state of +.Ar feature_name . +The only valid value when setting this property is +.Sy enabled +which moves +.Ar feature_name +to the enabled state. +See +.Xr zpool-features 7 +for details on feature states. +.It Sy listsnapshots Ns = Ns Sy on Ns | Ns Sy off +Controls whether information about snapshots associated with this pool is +output when +.Nm zfs Cm list +is run without the +.Fl t +option. +The default value is +.Sy off . +This property can also be referred to by its shortened name, +.Sy listsnaps . +.It Sy multihost Ns = Ns Sy on Ns | Ns Sy off +Controls whether a pool activity check should be performed during +.Nm zpool Cm import . +When a pool is determined to be active it cannot be imported, even with the +.Fl f +option. +This property is intended to be used in failover configurations +where multiple hosts have access to a pool on shared storage. +.Pp +Multihost provides protection on import only. +It does not protect against an +individual device being used in multiple pools, regardless of the type of vdev. +See the discussion under +.Nm zpool Cm create . +.Pp +When this property is on, periodic writes to storage occur to show the pool is +in use. +See +.Sy zfs_multihost_interval +in the +.Xr zfs 4 +manual page. +In order to enable this property each host must set a unique hostid. +See +.Xr genhostid 1 +.Xr zgenhostid 8 +.Xr spl 4 +for additional details. +The default value is +.Sy off . +.It Sy version Ns = Ns Ar version +The current on-disk version of the pool. +This can be increased, but never decreased. +The preferred method of updating pools is with the +.Nm zpool Cm upgrade +command, though this property can be used when a specific version is needed for +backwards compatibility. +Once feature flags are enabled on a pool this property will no longer have a +value. +.El diff --git a/man/man8/mount.zfs.8 b/man/man8/mount.zfs.8 index 5d36dbdb1692..2101f70cd595 100644 --- a/man/man8/mount.zfs.8 +++ b/man/man8/mount.zfs.8 @@ -56,7 +56,7 @@ in most cases. are handled according to the .Em Temporary Mount Point Properties section in -.Xr zfsprops 8 , +.Xr zfsprops 7 , except for those described below. .Pp If diff --git a/man/man8/zed.8.in b/man/man8/zed.8.in index e0d9f04043dc..b0b26bfcf8e2 100644 --- a/man/man8/zed.8.in +++ b/man/man8/zed.8.in @@ -237,7 +237,6 @@ Terminate the daemon. .El . .Sh SEE ALSO -.Xr zfs-events 5 , .Xr zfs 8 , .Xr zpool 8 , .Xr zpool-events 8 diff --git a/man/man8/zfs-bookmark.8 b/man/man8/zfs-bookmark.8 index d8833c3fbc7d..094a7b30902f 100644 --- a/man/man8/zfs-bookmark.8 +++ b/man/man8/zfs-bookmark.8 @@ -56,7 +56,7 @@ a redaction bookmark. .Pp This feature must be enabled to be used. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags and the .Sy bookmarks feature. diff --git a/man/man8/zfs-clone.8 b/man/man8/zfs-clone.8 index 24784ecb9aa1..0640244f2009 100644 --- a/man/man8/zfs-clone.8 +++ b/man/man8/zfs-clone.8 @@ -47,7 +47,7 @@ See the .Sx Clones section of -.Xr zfsconcepts 8 +.Xr zfsconcepts 7 for details. The target dataset can be located anywhere in the ZFS hierarchy, and is created as the same type as the original. diff --git a/man/man8/zfs-create.8 b/man/man8/zfs-create.8 index 100a6deeda53..55397fa661d5 100644 --- a/man/man8/zfs-create.8 +++ b/man/man8/zfs-create.8 @@ -184,7 +184,7 @@ See in the .Em Native Properties section of -.Xr zfsprops 8 +.Xr zfsprops 7 for more information about sparse volumes. .It Fl n Do a dry-run diff --git a/man/man8/zfs-jail.8 b/man/man8/zfs-jail.8 index d4a04073edbc..4f9faaea9bf5 100644 --- a/man/man8/zfs-jail.8 +++ b/man/man8/zfs-jail.8 @@ -119,5 +119,5 @@ or name .Ar jailname . .El .Sh SEE ALSO -.Xr jail 8 , -.Xr zfsprops 8 +.Xr zfsprops 7 , +.Xr jail 8 diff --git a/man/man8/zfs-list.8 b/man/man8/zfs-list.8 index 0313b3a14ecb..5200483868ff 100644 --- a/man/man8/zfs-list.8 +++ b/man/man8/zfs-list.8 @@ -90,7 +90,7 @@ The property must be: One of the properties described in the .Sx Native Properties section of -.Xr zfsprops 8 +.Xr zfsprops 7 .It A user property .It @@ -118,7 +118,7 @@ value of the property. The property must be one of the properties described in the .Sx Properties section of -.Xr zfsprops 8 +.Xr zfsprops 7 or the value .Sy name to sort by the dataset name. @@ -158,5 +158,5 @@ displays only snapshots. .El . .Sh SEE ALSO -.Xr zfs-get 8 , -.Xr zfsprops 8 +.Xr zfsprops 7 , +.Xr zfs-get 8 diff --git a/man/man8/zfs-load-key.8 b/man/man8/zfs-load-key.8 index f29d3df824fd..ed89b65d7159 100644 --- a/man/man8/zfs-load-key.8 +++ b/man/man8/zfs-load-key.8 @@ -296,6 +296,6 @@ Deduplication with encryption will leak information about which blocks are equivalent in a dataset and will incur an extra CPU cost for each block written. . .Sh SEE ALSO +.Xr zfsprops 7 , .Xr zfs-create 8 , -.Xr zfs-set 8 , -.Xr zfsprops 8 +.Xr zfs-set 8 diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index e4117101beb3..7aa332ba8174 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -186,7 +186,7 @@ to re-run all generators: .Xr systemd.mount 5 , .Xr systemd.target 5 , .Xr zfs 5 , -.Xr zfs-events 5 , .Xr systemd.generator 7 , .Xr systemd.special 7 , -.Xr zed 8 +.Xr zed 8 , +.Xr zpool-events 8 diff --git a/man/man8/zfs-mount.8 b/man/man8/zfs-mount.8 index 62275242c9f9..42ce6b5ca155 100644 --- a/man/man8/zfs-mount.8 +++ b/man/man8/zfs-mount.8 @@ -91,7 +91,7 @@ duration of the mount. See the .Em Temporary Mount Point Properties section of -.Xr zfsprops 8 +.Xr zfsprops 7 for details. .It Fl l Load keys for encrypted filesystems as they are being mounted. diff --git a/man/man8/zfs-receive.8 b/man/man8/zfs-receive.8 index ceb6e64ce571..d2cec42a8e71 100644 --- a/man/man8/zfs-receive.8 +++ b/man/man8/zfs-receive.8 @@ -357,7 +357,7 @@ To use this flag, the storage pool must have the .Sy extensible_dataset feature enabled. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags. .It Fl u File system that is associated with the received stream is not mounted. diff --git a/man/man8/zfs-send.8 b/man/man8/zfs-send.8 index 47b6c47ad03e..a3d08fbf6e2c 100644 --- a/man/man8/zfs-send.8 +++ b/man/man8/zfs-send.8 @@ -110,7 +110,7 @@ The receiving system must have the .Sy large_blocks pool feature enabled as well. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags and the .Sy large_blocks feature. @@ -161,7 +161,7 @@ received as an encrypted dataset, since encrypted datasets cannot use the .Sy embedded_data feature. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags and the .Sy embedded_data feature. @@ -308,7 +308,7 @@ The receiving system must have the .Sy large_blocks pool feature enabled as well. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags and the .Sy large_blocks feature. @@ -372,7 +372,7 @@ since encrypted datasets cannot use the .Sy embedded_data feature. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on ZFS feature flags and the .Sy embedded_data feature. diff --git a/man/man8/zfs-set.8 b/man/man8/zfs-set.8 index 83709fa6149c..a3588cc26638 100644 --- a/man/man8/zfs-set.8 +++ b/man/man8/zfs-set.8 @@ -65,7 +65,7 @@ .Xc Only some properties can be edited. See -.Xr zfsprops 8 +.Xr zfsprops 7 for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form @@ -78,7 +78,7 @@ User properties can be set on snapshots. For more information, see the .Em User Properties section of -.Xr zfsprops 8 . +.Xr zfsprops 7 . .It Xo .Nm zfs .Cm get @@ -114,7 +114,7 @@ This command takes a comma-separated list of properties as described in the and .Sx User Properties sections of -.Xr zfsprops 8 . +.Xr zfsprops 7 . .Pp The value .Sy all @@ -163,7 +163,7 @@ restored to default if no ancestor has the property set, or with the .Fl S option reverted to the received value if one exists. See -.Xr zfsprops 8 +.Xr zfsprops 7 for a listing of default values, and details on which properties can be inherited. .Bl -tag -width "-r" @@ -178,5 +178,5 @@ option was not specified. .El . .Sh SEE ALSO -.Xr zfs-list 8 , -.Xr zfsprops 8 +.Xr zfsprops 7 , +.Xr zfs-list 8 diff --git a/man/man8/zfs-share.8 b/man/man8/zfs-share.8 index 369f667c9d02..e30d538814ca 100644 --- a/man/man8/zfs-share.8 +++ b/man/man8/zfs-share.8 @@ -87,4 +87,4 @@ The command can also be given a path to a ZFS file system shared on the system. .Sh SEE ALSO .Xr exports 5 , .Xr smb.conf 5 , -.Xr zfsprops 8 +.Xr zfsprops 7 diff --git a/man/man8/zfs-snapshot.8 b/man/man8/zfs-snapshot.8 index fdff39fbc4b8..225123f44b2b 100644 --- a/man/man8/zfs-snapshot.8 +++ b/man/man8/zfs-snapshot.8 @@ -54,7 +54,7 @@ can be used as an alias for See the .Sx Snapshots section of -.Xr zfsconcepts 8 +.Xr zfsconcepts 7 for details. .Bl -tag -width "-o" .It Fl o Ar property Ns = Ns Ar value diff --git a/man/man8/zfs-upgrade.8 b/man/man8/zfs-upgrade.8 index 0ba276dc6ad5..f3620faa6135 100644 --- a/man/man8/zfs-upgrade.8 +++ b/man/man8/zfs-upgrade.8 @@ -77,7 +77,7 @@ systems running older versions of ZFS. .Pp In general, the file system version is independent of the pool version. See -.Xr zpool-features 5 +.Xr zpool-features 7 for information on features of ZFS storage pools. .Pp In some cases, the file system version and the pool version are interrelated and diff --git a/man/man8/zfs-userspace.8 b/man/man8/zfs-userspace.8 index d09e35e1fdd8..b7bd61b5709a 100644 --- a/man/man8/zfs-userspace.8 +++ b/man/man8/zfs-userspace.8 @@ -183,5 +183,5 @@ for types. .El . .Sh SEE ALSO -.Xr zfs-set 8 , -.Xr zfsprops 8 +.Xr zfsprops 7 , +.Xr zfs-set 8 diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 16c874eb20de..fca1ba00da7d 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -96,7 +96,7 @@ or .El .Pp See -.Xr zfsconcepts 8 +.Xr zfsconcepts 7 for details. . .Ss Properties @@ -108,7 +108,7 @@ In addition, native properties are either editable or read-only. User properties have no effect on ZFS behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about properties, see -.Xr zfsprops 8 . +.Xr zfsprops 7 . . .Ss Encryption Enabling the @@ -354,7 +354,7 @@ Snapshots are displayed if The default is .Sy off . See -.Xr zpoolprops 8 +.Xr zpoolprops 7 for more information on pool properties. .Bd -literal -compact -offset Ds .No # Nm zfs Cm list @@ -728,6 +728,8 @@ This option is provided for backwards compatibility with older ZFS versions. .Xr acl 5 , .Xr attributes 5 , .Xr exports 5 , +.Xr zfsconcepts 7 , +.Xr zfsprops 7 , .Xr exportfs 8 , .Xr mount 8 , .Xr net 8 , @@ -768,6 +770,4 @@ This option is provided for backwards compatibility with older ZFS versions. .Xr zfs-upgrade 8 , .Xr zfs-userspace 8 , .Xr zfs-wait 8 , -.Xr zfsconcepts 8 , -.Xr zfsprops 8 , .Xr zpool 8 diff --git a/man/man8/zfsconcepts.8 b/man/man8/zfsconcepts.8 deleted file mode 100644 index 3403d53bf8e7..000000000000 --- a/man/man8/zfsconcepts.8 +++ /dev/null @@ -1,206 +0,0 @@ -.\" -.\" CDDL HEADER START -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" CDDL HEADER END -.\" -.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2011 Joshua M. Clulow -.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. -.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. -.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. -.\" Copyright (c) 2014 Integros [integros.com] -.\" Copyright 2019 Richard Laager. All rights reserved. -.\" Copyright 2018 Nexenta Systems, Inc. -.\" Copyright 2019 Joyent, Inc. -.\" -.Dd June 30, 2019 -.Dt ZFSCONCEPTS 8 -.Os -. -.Sh NAME -.Nm zfsconcepts -.Nd overview of ZFS concepts -. -.Sh DESCRIPTION -.Ss ZFS File System Hierarchy -A ZFS storage pool is a logical collection of devices that provide space for -datasets. -A storage pool is also the root of the ZFS file system hierarchy. -.Pp -The root of the pool can be accessed as a file system, such as mounting and -unmounting, taking snapshots, and setting properties. -The physical storage characteristics, however, are managed by the -.Xr zpool 8 -command. -.Pp -See -.Xr zpool 8 -for more information on creating and administering pools. -.Ss Snapshots -A snapshot is a read-only copy of a file system or volume. -Snapshots can be created extremely quickly, and initially consume no additional -space within the pool. -As data within the active dataset changes, the snapshot consumes more data than -would otherwise be shared with the active dataset. -.Pp -Snapshots can have arbitrary names. -Snapshots of volumes can be cloned or rolled back, visibility is determined -by the -.Sy snapdev -property of the parent volume. -.Pp -File system snapshots can be accessed under the -.Pa .zfs/snapshot -directory in the root of the file system. -Snapshots are automatically mounted on demand and may be unmounted at regular -intervals. -The visibility of the -.Pa .zfs -directory can be controlled by the -.Sy snapdir -property. -.Ss Bookmarks -A bookmark is like a snapshot, a read-only copy of a file system or volume. -Bookmarks can be created extremely quickly, compared to snapshots, and they -consume no additional space within the pool. -Bookmarks can also have arbitrary names, much like snapshots. -.Pp -Unlike snapshots, bookmarks can not be accessed through the filesystem in any way. -From a storage standpoint a bookmark just provides a way to reference -when a snapshot was created as a distinct object. -Bookmarks are initially tied to a snapshot, not the filesystem or volume, -and they will survive if the snapshot itself is destroyed. -Since they are very light weight there's little incentive to destroy them. -.Ss Clones -A clone is a writable volume or file system whose initial contents are the same -as another dataset. -As with snapshots, creating a clone is nearly instantaneous, and initially -consumes no additional space. -.Pp -Clones can only be created from a snapshot. -When a snapshot is cloned, it creates an implicit dependency between the parent -and child. -Even though the clone is created somewhere else in the dataset hierarchy, the -original snapshot cannot be destroyed as long as a clone exists. -The -.Sy origin -property exposes this dependency, and the -.Cm destroy -command lists any such dependencies, if they exist. -.Pp -The clone parent-child dependency relationship can be reversed by using the -.Cm promote -subcommand. -This causes the -.Qq origin -file system to become a clone of the specified file system, which makes it -possible to destroy the file system that the clone was created from. -.Ss "Mount Points" -Creating a ZFS file system is a simple operation, so the number of file systems -per system is likely to be numerous. -To cope with this, ZFS automatically manages mounting and unmounting file -systems without the need to edit the -.Pa /etc/fstab -file. -All automatically managed file systems are mounted by ZFS at boot time. -.Pp -By default, file systems are mounted under -.Pa /path , -where -.Ar path -is the name of the file system in the ZFS namespace. -Directories are created and destroyed as needed. -.Pp -A file system can also have a mount point set in the -.Sy mountpoint -property. -This directory is created as needed, and ZFS automatically mounts the file -system when the -.Nm zfs Cm mount Fl a -command is invoked -.Po without editing -.Pa /etc/fstab -.Pc . -The -.Sy mountpoint -property can be inherited, so if -.Em pool/home -has a mount point of -.Pa /export/stuff , -then -.Em pool/home/user -automatically inherits a mount point of -.Pa /export/stuff/user . -.Pp -A file system -.Sy mountpoint -property of -.Sy none -prevents the file system from being mounted. -.Pp -If needed, ZFS file systems can also be managed with traditional tools -.Po -.Nm mount , -.Nm umount , -.Pa /etc/fstab -.Pc . -If a file system's mount point is set to -.Sy legacy , -ZFS makes no attempt to manage the file system, and the administrator is -responsible for mounting and unmounting the file system. -Because pools must -be imported before a legacy mount can succeed, administrators should ensure -that legacy mounts are only attempted after the zpool import process -finishes at boot time. -For example, on machines using systemd, the mount option -.Pp -.Nm x-systemd.requires=zfs-import.target -.Pp -will ensure that the zfs-import completes before systemd attempts mounting -the filesystem. -See -.Xr systemd.mount 5 -for details. -.Ss Deduplication -Deduplication is the process for removing redundant data at the block level, -reducing the total amount of data stored. -If a file system has the -.Sy dedup -property enabled, duplicate data blocks are removed synchronously. -The result -is that only unique data is stored and common components are shared among files. -.Pp -Deduplicating data is a very resource-intensive operation. -It is generally recommended that you have at least 1.25 GiB of RAM -per 1 TiB of storage when you enable deduplication. -Calculating the exact requirement depends heavily -on the type of data stored in the pool. -.Pp -Enabling deduplication on an improperly-designed system can result in -performance issues (slow IO and administrative operations). -It can potentially lead to problems importing a pool due to memory exhaustion. -Deduplication can consume significant processing power (CPU) and memory as well -as generate additional disk IO. -.Pp -Before creating a pool with deduplication enabled, ensure that you have planned -your hardware requirements appropriately and implemented appropriate recovery -practices, such as regular backups. -Consider using the -.Sy compression -property as a less resource-intensive alternative. diff --git a/man/man8/zfsprops.8 b/man/man8/zfsprops.8 deleted file mode 100644 index 1b985c98e248..000000000000 --- a/man/man8/zfsprops.8 +++ /dev/null @@ -1,2067 +0,0 @@ -.\" -.\" CDDL HEADER START -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" CDDL HEADER END -.\" -.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2011 Joshua M. Clulow -.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. -.\" Copyright (c) 2011, Pawel Jakub Dawidek -.\" Copyright (c) 2012, Glen Barber -.\" Copyright (c) 2012, Bryan Drewery -.\" Copyright (c) 2013, Steven Hartland -.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. -.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. -.\" Copyright (c) 2014 Integros [integros.com] -.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. -.\" Copyright (c) 2014, Xin LI -.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. -.\" Copyright 2019 Richard Laager. All rights reserved. -.\" Copyright 2018 Nexenta Systems, Inc. -.\" Copyright 2019 Joyent, Inc. -.\" Copyright (c) 2019, Kjeld Schouten-Lebbing -.\" -.Dd May 24, 2021 -.Dt ZFSPROPS 8 -.Os -. -.Sh NAME -.Nm zfsprops -.Nd native and user-defined properties of ZFS datasets -. -.Sh DESCRIPTION -Properties are divided into two types, native properties and user-defined -.Po or -.Qq user -.Pc -properties. -Native properties either export internal statistics or control ZFS behavior. -In addition, native properties are either editable or read-only. -User properties have no effect on ZFS behavior, but you can use them to annotate -datasets in a way that is meaningful in your environment. -For more information about user properties, see the -.Sx User Properties -section, below. -. -.Ss Native Properties -Every dataset has a set of properties that export statistics about the dataset -as well as control various behaviors. -Properties are inherited from the parent unless overridden by the child. -Some properties apply only to certain types of datasets -.Pq file systems, volumes, or snapshots . -.Pp -The values of numeric properties can be specified using human-readable suffixes -.Po for example, -.Sy k , -.Sy KB , -.Sy M , -.Sy Gb , -and so forth, up to -.Sy Z -for zettabyte -.Pc . -The following are all valid -.Pq and equal -specifications: -.Li 1536M, 1.5g, 1.50GB . -.Pp -The values of non-numeric properties are case sensitive and must be lowercase, -except for -.Sy mountpoint , -.Sy sharenfs , -and -.Sy sharesmb . -.Pp -The following native properties consist of read-only statistics about the -dataset. -These properties can be neither set, nor inherited. -Native properties apply to all dataset types unless otherwise noted. -.Bl -tag -width "usedbyrefreservation" -.It Sy available -The amount of space available to the dataset and all its children, assuming that -there is no other activity in the pool. -Because space is shared within a pool, availability can be limited by any number -of factors, including physical pool size, quotas, reservations, or other -datasets within the pool. -.Pp -This property can also be referred to by its shortened column name, -.Sy avail . -.It Sy compressratio -For non-snapshots, the compression ratio achieved for the -.Sy used -space of this dataset, expressed as a multiplier. -The -.Sy used -property includes descendant datasets, and, for clones, does not include the -space shared with the origin snapshot. -For snapshots, the -.Sy compressratio -is the same as the -.Sy refcompressratio -property. -Compression can be turned on by running: -.Nm zfs Cm set Sy compression Ns = Ns Sy on Ar dataset . -The default value is -.Sy off . -.It Sy createtxg -The transaction group (txg) in which the dataset was created. -Bookmarks have the same -.Sy createtxg -as the snapshot they are initially tied to. -This property is suitable for ordering a list of snapshots, -e.g. for incremental send and receive. -.It Sy creation -The time this dataset was created. -.It Sy clones -For snapshots, this property is a comma-separated list of filesystems or volumes -which are clones of this snapshot. -The clones' -.Sy origin -property is this snapshot. -If the -.Sy clones -property is not empty, then this snapshot can not be destroyed -.Po even with the -.Fl r -or -.Fl f -options -.Pc . -The roles of origin and clone can be swapped by promoting the clone with the -.Nm zfs Cm promote -command. -.It Sy defer_destroy -This property is -.Sy on -if the snapshot has been marked for deferred destroy by using the -.Nm zfs Cm destroy Fl d -command. -Otherwise, the property is -.Sy off . -.It Sy encryptionroot -For encrypted datasets, indicates where the dataset is currently inheriting its -encryption key from. -Loading or unloading a key for the -.Sy encryptionroot -will implicitly load / unload the key for any inheriting datasets (see -.Nm zfs Cm load-key -and -.Nm zfs Cm unload-key -for details). -Clones will always share an -encryption key with their origin. -See the -.Sx Encryption -section of -.Xr zfs-load-key 8 -for details. -.It Sy filesystem_count -The total number of filesystems and volumes that exist under this location in -the dataset tree. -This value is only available when a -.Sy filesystem_limit -has been set somewhere in the tree under which the dataset resides. -.It Sy keystatus -Indicates if an encryption key is currently loaded into ZFS. -The possible values are -.Sy none , -.Sy available , -and -.Sy unavailable . -See -.Nm zfs Cm load-key -and -.Nm zfs Cm unload-key . -.It Sy guid -The 64 bit GUID of this dataset or bookmark which does not change over its -entire lifetime. -When a snapshot is sent to another pool, the received snapshot has the same GUID. -Thus, the -.Sy guid -is suitable to identify a snapshot across pools. -.It Sy logicalreferenced -The amount of space that is -.Qq logically -accessible by this dataset. -See the -.Sy referenced -property. -The logical space ignores the effect of the -.Sy compression -and -.Sy copies -properties, giving a quantity closer to the amount of data that applications -see. -However, it does include space consumed by metadata. -.Pp -This property can also be referred to by its shortened column name, -.Sy lrefer . -.It Sy logicalused -The amount of space that is -.Qq logically -consumed by this dataset and all its descendents. -See the -.Sy used -property. -The logical space ignores the effect of the -.Sy compression -and -.Sy copies -properties, giving a quantity closer to the amount of data that applications -see. -However, it does include space consumed by metadata. -.Pp -This property can also be referred to by its shortened column name, -.Sy lused . -.It Sy mounted -For file systems, indicates whether the file system is currently mounted. -This property can be either -.Sy yes -or -.Sy no . -.It Sy objsetid -A unique identifier for this dataset within the pool. -Unlike the dataset's -.Sy guid , No the Sy objsetid -of a dataset is not transferred to other pools when the snapshot is copied -with a send/receive operation. -The -.Sy objsetid -can be reused (for a new dataset) after the dataset is deleted. -.It Sy origin -For cloned file systems or volumes, the snapshot from which the clone was -created. -See also the -.Sy clones -property. -.It Sy receive_resume_token -For filesystems or volumes which have saved partially-completed state from -.Nm zfs Cm receive Fl s , -this opaque token can be provided to -.Nm zfs Cm send Fl t -to resume and complete the -.Nm zfs Cm receive . -.It Sy redact_snaps -For bookmarks, this is the list of snapshot guids the bookmark contains a redaction -list for. -For snapshots, this is the list of snapshot guids the snapshot is redacted with -respect to. -.It Sy referenced -The amount of data that is accessible by this dataset, which may or may not be -shared with other datasets in the pool. -When a snapshot or clone is created, it initially references the same amount of -space as the file system or snapshot it was created from, since its contents are -identical. -.Pp -This property can also be referred to by its shortened column name, -.Sy refer . -.It Sy refcompressratio -The compression ratio achieved for the -.Sy referenced -space of this dataset, expressed as a multiplier. -See also the -.Sy compressratio -property. -.It Sy snapshot_count -The total number of snapshots that exist under this location in the dataset -tree. -This value is only available when a -.Sy snapshot_limit -has been set somewhere in the tree under which the dataset resides. -.It Sy type -The type of dataset: -.Sy filesystem , -.Sy volume , -.Sy snapshot , -or -.Sy bookmark . -.It Sy used -The amount of space consumed by this dataset and all its descendents. -This is the value that is checked against this dataset's quota and reservation. -The space used does not include this dataset's reservation, but does take into -account the reservations of any descendent datasets. -The amount of space that a dataset consumes from its parent, as well as the -amount of space that is freed if this dataset is recursively destroyed, is the -greater of its space used and its reservation. -.Pp -The used space of a snapshot -.Po see the -.Sx Snapshots -section of -.Xr zfsconcepts 8 -.Pc -is space that is referenced exclusively by this snapshot. -If this snapshot is destroyed, the amount of -.Sy used -space will be freed. -Space that is shared by multiple snapshots isn't accounted for in this metric. -When a snapshot is destroyed, space that was previously shared with this -snapshot can become unique to snapshots adjacent to it, thus changing the used -space of those snapshots. -The used space of the latest snapshot can also be affected by changes in the -file system. -Note that the -.Sy used -space of a snapshot is a subset of the -.Sy written -space of the snapshot. -.Pp -The amount of space used, available, or referenced does not take into account -pending changes. -Pending changes are generally accounted for within a few seconds. -Committing a change to a disk using -.Xr fsync 2 -or -.Sy O_SYNC -does not necessarily guarantee that the space usage information is updated -immediately. -.It Sy usedby* -The -.Sy usedby* -properties decompose the -.Sy used -properties into the various reasons that space is used. -Specifically, -.Sy used No = -.Sy usedbychildren No + -.Sy usedbydataset No + -.Sy usedbyrefreservation No + -.Sy usedbysnapshots . -These properties are only available for datasets created on -.Nm zpool -.Qo version 13 Qc -pools. -.It Sy usedbychildren -The amount of space used by children of this dataset, which would be freed if -all the dataset's children were destroyed. -.It Sy usedbydataset -The amount of space used by this dataset itself, which would be freed if the -dataset were destroyed -.Po after first removing any -.Sy refreservation -and destroying any necessary snapshots or descendents -.Pc . -.It Sy usedbyrefreservation -The amount of space used by a -.Sy refreservation -set on this dataset, which would be freed if the -.Sy refreservation -was removed. -.It Sy usedbysnapshots -The amount of space consumed by snapshots of this dataset. -In particular, it is the amount of space that would be freed if all of this -dataset's snapshots were destroyed. -Note that this is not simply the sum of the snapshots' -.Sy used -properties because space can be shared by multiple snapshots. -.It Sy userused Ns @ Ns Ar user -The amount of space consumed by the specified user in this dataset. -Space is charged to the owner of each file, as displayed by -.Nm ls Fl l . -The amount of space charged is displayed by -.Nm du No and Nm ls Fl s . -See the -.Nm zfs Cm userspace -command for more information. -.Pp -Unprivileged users can access only their own space usage. -The root user, or a user who has been granted the -.Sy userused -privilege with -.Nm zfs Cm allow , -can access everyone's usage. -.Pp -The -.Sy userused Ns @ Ns Ar ... -properties are not displayed by -.Nm zfs Cm get Sy all . -The user's name must be appended after the -.Sy @ -symbol, using one of the following forms: -.Bl -bullet -compact -offset 4n -.It -POSIX name -.Pq Qq joe -.It -POSIX numeric ID -.Pq Qq 789 -.It -SID name -.Pq Qq joe.smith@mydomain -.It -SID numeric ID -.Pq Qq S-1-123-456-789 -.El -.Pp -Files created on Linux always have POSIX owners. -.It Sy userobjused Ns @ Ns Ar user -The -.Sy userobjused -property is similar to -.Sy userused -but instead it counts the number of objects consumed by a user. -This property counts all objects allocated on behalf of the user, -it may differ from the results of system tools such as -.Nm df Fl i . -.Pp -When the property -.Sy xattr Ns = Ns Sy on -is set on a file system additional objects will be created per-file to store -extended attributes. -These additional objects are reflected in the -.Sy userobjused -value and are counted against the user's -.Sy userobjquota . -When a file system is configured to use -.Sy xattr Ns = Ns Sy sa -no additional internal objects are normally required. -.It Sy userrefs -This property is set to the number of user holds on this snapshot. -User holds are set by using the -.Nm zfs Cm hold -command. -.It Sy groupused Ns @ Ns Ar group -The amount of space consumed by the specified group in this dataset. -Space is charged to the group of each file, as displayed by -.Nm ls Fl l . -See the -.Sy userused Ns @ Ns Ar user -property for more information. -.Pp -Unprivileged users can only access their own groups' space usage. -The root user, or a user who has been granted the -.Sy groupused -privilege with -.Nm zfs Cm allow , -can access all groups' usage. -.It Sy groupobjused Ns @ Ns Ar group -The number of objects consumed by the specified group in this dataset. -Multiple objects may be charged to the group for each file when extended -attributes are in use. -See the -.Sy userobjused Ns @ Ns Ar user -property for more information. -.Pp -Unprivileged users can only access their own groups' space usage. -The root user, or a user who has been granted the -.Sy groupobjused -privilege with -.Nm zfs Cm allow , -can access all groups' usage. -.It Sy projectused Ns @ Ns Ar project -The amount of space consumed by the specified project in this dataset. -Project is identified via the project identifier (ID) that is object-based -numeral attribute. -An object can inherit the project ID from its parent object (if the -parent has the flag of inherit project ID that can be set and changed via -.Nm chattr Fl /+P -or -.Nm zfs project Fl s ) -when being created. -The privileged user can set and change object's project -ID via -.Nm chattr Fl p -or -.Nm zfs project Fl s -anytime. -Space is charged to the project of each file, as displayed by -.Nm lsattr Fl p -or -.Nm zfs project . -See the -.Sy userused Ns @ Ns Ar user -property for more information. -.Pp -The root user, or a user who has been granted the -.Sy projectused -privilege with -.Nm zfs allow , -can access all projects' usage. -.It Sy projectobjused Ns @ Ns Ar project -The -.Sy projectobjused -is similar to -.Sy projectused -but instead it counts the number of objects consumed by project. -When the property -.Sy xattr Ns = Ns Sy on -is set on a fileset, ZFS will create additional objects per-file to store -extended attributes. -These additional objects are reflected in the -.Sy projectobjused -value and are counted against the project's -.Sy projectobjquota . -When a filesystem is configured to use -.Sy xattr Ns = Ns Sy sa -no additional internal objects are required. -See the -.Sy userobjused Ns @ Ns Ar user -property for more information. -.Pp -The root user, or a user who has been granted the -.Sy projectobjused -privilege with -.Nm zfs allow , -can access all projects' objects usage. -.It Sy volblocksize -For volumes, specifies the block size of the volume. -The -.Sy blocksize -cannot be changed once the volume has been written, so it should be set at -volume creation time. -The default -.Sy blocksize -for volumes is 8 Kbytes. -Any power of 2 from 512 bytes to 128 Kbytes is valid. -.Pp -This property can also be referred to by its shortened column name, -.Sy volblock . -.It Sy written -The amount of space -.Sy referenced -by this dataset, that was written since the previous snapshot -.Pq i.e. that is not referenced by the previous snapshot . -.It Sy written Ns @ Ns Ar snapshot -The amount of -.Sy referenced -space written to this dataset since the specified snapshot. -This is the space that is referenced by this dataset but was not referenced by -the specified snapshot. -.Pp -The -.Ar snapshot -may be specified as a short snapshot name -.Pq just the part after the Sy @ , -in which case it will be interpreted as a snapshot in the same filesystem as -this dataset. -The -.Ar snapshot -may be a full snapshot name -.Pq Ar filesystem Ns @ Ns Ar snapshot , -which for clones may be a snapshot in the origin's filesystem -.Pq or the origin of the origin's filesystem, etc. -.El -.Pp -The following native properties can be used to change the behavior of a ZFS -dataset. -.Bl -tag -width "" -.It Xo -.Sy aclinherit Ns = Ns Sy discard Ns | Ns Sy noallow Ns | Ns -.Sy restricted Ns | Ns Sy passthrough Ns | Ns Sy passthrough-x -.Xc -Controls how ACEs are inherited when files and directories are created. -.Bl -tag -compact -offset 4n -width "passthrough-x" -.It Sy discard -does not inherit any ACEs. -.It Sy noallow -only inherits inheritable ACEs that specify -.Qq deny -permissions. -.It Sy restricted -default, removes the -.Sy write_acl -and -.Sy write_owner -permissions when the ACE is inherited. -.It Sy passthrough -inherits all inheritable ACEs without any modifications. -.It Sy passthrough-x -same meaning as -.Sy passthrough , -except that the -.Sy owner@ , group@ , No and Sy everyone@ -ACEs inherit the execute permission only if the file creation mode also requests -the execute bit. -.El -.Pp -When the property value is set to -.Sy passthrough , -files are created with a mode determined by the inheritable ACEs. -If no inheritable ACEs exist that affect the mode, then the mode is set in -accordance to the requested mode from the application. -.Pp -The -.Sy aclinherit -property does not apply to POSIX ACLs. -.It Xo -.Sy aclmode Ns = Ns Sy discard Ns | Ns Sy groupmask Ns | Ns -.Sy passthrough Ns | Ns Sy restricted Ns -.Xc -Controls how an ACL is modified during chmod(2) and how inherited ACEs -are modified by the file creation mode: -.Bl -tag -compact -offset 4n -width "passthrough" -.It Sy discard -default, deletes all -.Sy ACEs -except for those representing -the mode of the file or directory requested by -.Xr chmod 2 . -.It Sy groupmask -reduces permissions granted in all -.Sy ALLOW -entries found in the -.Sy ACL -such that they are no greater than the group permissions specified by -.Xr chmod 2 . -.It Sy passthrough -indicates that no changes are made to the ACL other than creating or updating -the necessary ACL entries to represent the new mode of the file or directory. -.It Sy restricted -will cause the -.Xr chmod 2 -operation to return an error when used on any file or directory which has -a non-trivial ACL whose entries can not be represented by a mode. -.Xr chmod 2 -is required to change the set user ID, set group ID, or sticky bits on a file -or directory, as they do not have equivalent ACL entries. -In order to use -.Xr chmod 2 -on a file or directory with a non-trivial ACL when -.Sy aclmode -is set to -.Sy restricted , -you must first remove all ACL entries which do not represent the current mode. -.El -.It Sy acltype Ns = Ns Sy off Ns | Ns Sy nfsv4 Ns | Ns Sy posix -Controls whether ACLs are enabled and if so what type of ACL to use. -When this property is set to a type of ACL not supported by the current -platform, the behavior is the same as if it were set to -.Sy off . -.Bl -tag -compact -offset 4n -width "posixacl" -.It Sy off -default on Linux, when a file system has the -.Sy acltype -property set to off then ACLs are disabled. -.It Sy noacl -an alias for -.Sy off -.It Sy nfsv4 -default on -.Fx , -indicates that NFSv4-style ZFS ACLs should be used. -These ACLs can be managed with the -.Xr getfacl 1 -and -.Xr setfacl 1 . -The -.Sy nfsv4 -ZFS ACL type is not yet supported on Linux. -.It Sy posix -indicates POSIX ACLs should be used. -POSIX ACLs are specific to Linux and are not functional on other platforms. -POSIX ACLs are stored as an extended -attribute and therefore will not overwrite any existing NFSv4 ACLs which -may be set. -.It Sy posixacl -an alias for -.Sy posix -.El -.Pp -To obtain the best performance when setting -.Sy posix -users are strongly encouraged to set the -.Sy xattr Ns = Ns Sy sa -property. -This will result in the POSIX ACL being stored more efficiently on disk. -But as a consequence, all new extended attributes will only be -accessible from OpenZFS implementations which support the -.Sy xattr Ns = Ns Sy sa -property. -See the -.Sy xattr -property for more details. -.It Sy atime Ns = Ns Sy on Ns | Ns Sy off -Controls whether the access time for files is updated when they are read. -Turning this property off avoids producing write traffic when reading files and -can result in significant performance gains, though it might confuse mailers -and other similar utilities. -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy atime -and -.Sy noatime -mount options. -The default value is -.Sy on . -See also -.Sy relatime -below. -.It Sy canmount Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy noauto -If this property is set to -.Sy off , -the file system cannot be mounted, and is ignored by -.Nm zfs Cm mount Fl a . -Setting this property to -.Sy off -is similar to setting the -.Sy mountpoint -property to -.Sy none , -except that the dataset still has a normal -.Sy mountpoint -property, which can be inherited. -Setting this property to -.Sy off -allows datasets to be used solely as a mechanism to inherit properties. -One example of setting -.Sy canmount Ns = Ns Sy off -is to have two datasets with the same -.Sy mountpoint , -so that the children of both datasets appear in the same directory, but might -have different inherited characteristics. -.Pp -When set to -.Sy noauto , -a dataset can only be mounted and unmounted explicitly. -The dataset is not mounted automatically when the dataset is created or -imported, nor is it mounted by the -.Nm zfs Cm mount Fl a -command or unmounted by the -.Nm zfs Cm unmount Fl a -command. -.Pp -This property is not inherited. -.It Xo -.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns -.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns -.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr -.Xc -Controls the checksum used to verify data integrity. -The default value is -.Sy on , -which automatically selects an appropriate algorithm -.Po currently, -.Sy fletcher4 , -but this may change in future releases -.Pc . -The value -.Sy off -disables integrity checking on user data. -The value -.Sy noparity -not only disables integrity but also disables maintaining parity for user data. -This setting is used internally by a dump device residing on a RAID-Z pool and -should not be used by any other dataset. -Disabling checksums is -.Em NOT -a recommended practice. -.Pp -The -.Sy sha512 , -.Sy skein , -and -.Sy edonr -checksum algorithms require enabling the appropriate features on the pool. -.Fx -does not support the -.Sy edonr -algorithm. -.Pp -Please see -.Xr zpool-features 5 -for more information on these algorithms. -.Pp -Changing this property affects only newly-written data. -.It Xo -.Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns -.Sy gzip- Ns Ar N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle Ns | Ns Sy zstd Ns | Ns -.Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns Ar N -.Xc -Controls the compression algorithm used for this dataset. -.Pp -Setting compression to -.Sy on -indicates that the current default compression algorithm should be used. -The default balances compression and decompression speed, with compression ratio -and is expected to work well on a wide variety of workloads. -Unlike all other settings for this property, -.Sy on -does not select a fixed compression type. -As new compression algorithms are added to ZFS and enabled on a pool, the -default compression algorithm may change. -The current default compression algorithm is either -.Sy lzjb -or, if the -.Sy lz4_compress -feature is enabled, -.Sy lz4 . -.Pp -The -.Sy lz4 -compression algorithm is a high-performance replacement for the -.Sy lzjb -algorithm. -It features significantly faster compression and decompression, as well as a -moderately higher compression ratio than -.Sy lzjb , -but can only be used on pools with the -.Sy lz4_compress -feature set to -.Sy enabled . -See -.Xr zpool-features 5 -for details on ZFS feature flags and the -.Sy lz4_compress -feature. -.Pp -The -.Sy lzjb -compression algorithm is optimized for performance while providing decent data -compression. -.Pp -The -.Sy gzip -compression algorithm uses the same compression as the -.Xr gzip 1 -command. -You can specify the -.Sy gzip -level by using the value -.Sy gzip- Ns Ar N , -where -.Ar N -is an integer from 1 -.Pq fastest -to 9 -.Pq best compression ratio . -Currently, -.Sy gzip -is equivalent to -.Sy gzip-6 -.Po which is also the default for -.Xr gzip 1 -.Pc . -.Pp -The -.Sy zstd -compression algorithm provides both high compression ratios and good performance. -You can specify the -.Sy zstd -level by using the value -.Sy zstd- Ns Ar N , -where -.Ar N -is an integer from 1 -.Pq fastest -to 19 -.Pq best compression ratio . -.Sy zstd -is equivalent to -.Sy zstd-3 . -.Pp -Faster speeds at the cost of the compression ratio can be requested by -setting a negative -.Sy zstd -level. -This is done using -.Sy zstd-fast- Ns Ar N , -where -.Ar N -is an integer in [1-9,10,20,30,...,100,500,1000] which maps to a negative -.Sy zstd -level. -The lower the level the faster the compression - -.Ar 1000 No provides the fastest compression and lowest compression ratio. -.Sy zstd-fast -is equivalent to -.Sy zstd-fast-1 . -.Pp -The -.Sy zle -compression algorithm compresses runs of zeros. -.Pp -This property can also be referred to by its shortened column name -.Sy compress . -Changing this property affects only newly-written data. -.Pp -When any setting except -.Sy off -is selected, compression will explicitly check for blocks consisting of only -zeroes (the NUL byte). -When a zero-filled block is detected, it is stored as -a hole and not compressed using the indicated compression algorithm. -.Pp -Any block being compressed must be no larger than 7/8 of its original size -after compression, otherwise the compression will not be considered worthwhile -and the block saved uncompressed. -Note that when the logical block is less than -8 times the disk sector size this effectively reduces the necessary compression -ratio; for example, 8kB blocks on disks with 4kB disk sectors must compress to 1/2 -or less of their original size. -.It Xo -.Sy context Ns = Ns Sy none Ns | Ns -.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level -.Xc -This flag sets the SELinux context for all files in the file system under -a mount point for that file system. -See -.Xr selinux 8 -for more information. -.It Xo -.Sy fscontext Ns = Ns Sy none Ns | Ns -.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level -.Xc -This flag sets the SELinux context for the file system file system being -mounted. -See -.Xr selinux 8 -for more information. -.It Xo -.Sy defcontext Ns = Ns Sy none Ns | Ns -.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level -.Xc -This flag sets the SELinux default context for unlabeled files. -See -.Xr selinux 8 -for more information. -.It Xo -.Sy rootcontext Ns = Ns Sy none Ns | Ns -.Ar SELinux-User : Ns Ar SElinux-Role : Ns Ar Selinux-Type : Ns Ar Sensitivity-Level -.Xc -This flag sets the SELinux context for the root inode of the file system. -See -.Xr selinux 8 -for more information. -.It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3 -Controls the number of copies of data stored for this dataset. -These copies are in addition to any redundancy provided by the pool, for -example, mirroring or RAID-Z. -The copies are stored on different disks, if possible. -The space used by multiple copies is charged to the associated file and dataset, -changing the -.Sy used -property and counting against quotas and reservations. -.Pp -Changing this property only affects newly-written data. -Therefore, set this property at file system creation time by using the -.Fl o Sy copies Ns = Ns Ar N -option. -.Pp -Remember that ZFS will not import a pool with a missing top-level vdev. -Do -.Em NOT -create, for example a two-disk striped pool and set -.Sy copies Ns = Ns Ar 2 -on some datasets thinking you have setup redundancy for them. -When a disk fails you will not be able to import the pool -and will have lost all of your data. -.Pp -Encrypted datasets may not have -.Sy copies Ns = Ns Ar 3 -since the implementation stores some encryption metadata where the third copy -would normally be. -.It Sy devices Ns = Ns Sy on Ns | Ns Sy off -Controls whether device nodes can be opened on this file system. -The default value is -.Sy on . -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy dev -and -.Sy nodev -mount options. -.It Xo -.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns -.Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns -.Sy edonr , Ns Sy verify -.Xc -Configures deduplication for a dataset. -The default value is -.Sy off . -The default deduplication checksum is -.Sy sha256 -(this may change in the future). -When -.Sy dedup -is enabled, the checksum defined here overrides the -.Sy checksum -property. -Setting the value to -.Sy verify -has the same effect as the setting -.Sy sha256 , Ns Sy verify . -.Pp -If set to -.Sy verify , -ZFS will do a byte-to-byte comparison in case of two blocks having the same -signature to make sure the block contents are identical. -Specifying -.Sy verify -is mandatory for the -.Sy edonr -algorithm. -.Pp -Unless necessary, deduplication should -.Em not -be enabled on a system. -See the -.Sx Deduplication -section of -.Xr zfsconcepts 8 . -.It Xo -.Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns -.Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k -.Xc -Specifies a compatibility mode or literal value for the size of dnodes in the -file system. -The default value is -.Sy legacy . -Setting this property to a value other than -.Sy legacy No requires the Sy large_dnode No pool feature to be enabled. -.Pp -Consider setting -.Sy dnodesize -to -.Sy auto -if the dataset uses the -.Sy xattr Ns = Ns Sy sa -property setting and the workload makes heavy use of extended attributes. -This -may be applicable to SELinux-enabled systems, Lustre servers, and Samba -servers, for example. -Literal values are supported for cases where the optimal -size is known in advance and for performance testing. -.Pp -Leave -.Sy dnodesize -set to -.Sy legacy -if you need to receive a send stream of this dataset on a pool that doesn't -enable the -.Sy large_dnode -feature, or if you need to import this pool on a system that doesn't support the -.Sy large_dnode No feature. -.Pp -This property can also be referred to by its shortened column name, -.Sy dnsize . -.It Xo -.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns -.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns -.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm -.Xc -Controls the encryption cipher suite (block cipher, key length, and mode) used -for this dataset. -Requires the -.Sy encryption -feature to be enabled on the pool. -Requires a -.Sy keyformat -to be set at dataset creation time. -.Pp -Selecting -.Sy encryption Ns = Ns Sy on -when creating a dataset indicates that the default encryption suite will be -selected, which is currently -.Sy aes-256-gcm . -In order to provide consistent data protection, encryption must be specified at -dataset creation time and it cannot be changed afterwards. -.Pp -For more details and caveats about encryption see the -.Sx Encryption -section of -.Xr zfs-load-key 8 . -.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase -Controls what format the user's encryption key will be provided as. -This property is only set when the dataset is encrypted. -.Pp -Raw keys and hex keys must be 32 bytes long (regardless of the chosen -encryption suite) and must be randomly generated. -A raw key can be generated with the following command: -.Dl # Nm dd Sy if=/dev/urandom bs=32 count=1 Sy of= Ns Pa /path/to/output/key -.Pp -Passphrases must be between 8 and 512 bytes long and will be processed through -PBKDF2 before being used (see the -.Sy pbkdf2iters -property). -Even though the encryption suite cannot be changed after dataset creation, -the keyformat can be with -.Nm zfs Cm change-key . -.It Xo -.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Ar /absolute/file/path Ns | Ns Sy https:// Ns Ar address Ns | Ns Sy http:// Ns Ar address -.Xc -Controls where the user's encryption key will be loaded from by default for -commands such as -.Nm zfs Cm load-key -and -.Nm zfs Cm mount Fl l . -This property is only set for encrypted datasets which are encryption roots. -If unspecified, the default is -.Sy prompt . -.Pp -Even though the encryption suite cannot be changed after dataset creation, the -keylocation can be with either -.Nm zfs Cm set -or -.Nm zfs Cm change-key . -If -.Sy prompt -is selected ZFS will ask for the key at the command prompt when it is required -to access the encrypted data (see -.Nm zfs Cm load-key -for details). -This setting will also allow the key to be passed in via the standard input stream, -but users should be careful not to place keys which should be kept secret on -the command line. -If a file URI is selected, the key will be loaded from the -specified absolute file path. -If an HTTPS or HTTP URL is selected, it will be GETted using -.Xr fetch 3 , -libcurl, or nothing, depending on compile-time configuration and run-time availability. -The -.Sy SSL_CA_CERT_FILE -environment variable can be set to set the location -of the concatenated certificate store. -The -.Sy SSL_CA_CERT_PATH -environment variable can be set to override the location -of the directory containing the certificate authority bundle. -The -.Sy SSL_CLIENT_CERT_FILE -and -.Sy SSL_CLIENT_KEY_FILE -environment variables can be set to configure the path -to the client certificate and its key. -.It Sy pbkdf2iters Ns = Ns Ar iterations -Controls the number of PBKDF2 iterations that a -.Sy passphrase -encryption key should be run through when processing it into an encryption key. -This property is only defined when encryption is enabled and a keyformat of -.Sy passphrase -is selected. -The goal of PBKDF2 is to significantly increase the -computational difficulty needed to brute force a user's passphrase. -This is accomplished by forcing the attacker to run each passphrase through a -computationally expensive hashing function many times before they arrive at the -resulting key. -A user who actually knows the passphrase will only have to pay this cost once. -As CPUs become better at processing, this number should be -raised to ensure that a brute force attack is still not possible. -The current default is -.Sy 350000 -and the minimum is -.Sy 100000 . -This property may be changed with -.Nm zfs Cm change-key . -.It Sy exec Ns = Ns Sy on Ns | Ns Sy off -Controls whether processes can be executed from within this file system. -The default value is -.Sy on . -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy exec -and -.Sy noexec -mount options. -.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none -Limits the number of filesystems and volumes that can exist under this point in -the dataset tree. -The limit is not enforced if the user is allowed to change the limit. -Setting a -.Sy filesystem_limit -to -.Sy on -a descendent of a filesystem that already has a -.Sy filesystem_limit -does not override the ancestor's -.Sy filesystem_limit , -but rather imposes an additional limit. -This feature must be enabled to be used -.Po see -.Xr zpool-features 5 -.Pc . -.It Sy special_small_blocks Ns = Ns Ar size -This value represents the threshold block size for including small file -blocks into the special allocation class. -Blocks smaller than or equal to this -value will be assigned to the special allocation class while greater blocks -will be assigned to the regular class. -Valid values are zero or a power of two from 512B up to 1M. -The default size is 0 which means no small file blocks -will be allocated in the special class. -.Pp -Before setting this property, a special class vdev must be added to the -pool. -See -.Xr zpoolconcepts 8 -for more details on the special allocation class. -.It Sy mountpoint Ns = Ns Pa path Ns | Ns Sy none Ns | Ns Sy legacy -Controls the mount point used for this file system. -See the -.Sx Mount Points -section of -.Xr zfsconcepts 8 -for more information on how this property is used. -.Pp -When the -.Sy mountpoint -property is changed for a file system, the file system and any children that -inherit the mount point are unmounted. -If the new value is -.Sy legacy , -then they remain unmounted. -Otherwise, they are automatically remounted in the new location if the property -was previously -.Sy legacy -or -.Sy none , -or if they were mounted before the property was changed. -In addition, any shared file systems are unshared and shared in the new -location. -.It Sy nbmand Ns = Ns Sy on Ns | Ns Sy off -Controls whether the file system should be mounted with -.Sy nbmand -.Pq Non-blocking mandatory locks . -This is used for SMB clients. -Changes to this property only take effect when the file system is umounted and -remounted. -Support for these locks is scarce and not described by POSIX. -.It Sy overlay Ns = Ns Sy on Ns | Ns Sy off -Allow mounting on a busy directory or a directory which already contains -files or directories. -This is the default mount behavior for Linux and -.Fx -file systems. -On these platforms the property is -.Sy on -by default. -Set to -.Sy off -to disable overlay mounts for consistency with OpenZFS on other platforms. -.It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata -Controls what is cached in the primary cache -.Pq ARC . -If this property is set to -.Sy all , -then both user data and metadata is cached. -If this property is set to -.Sy none , -then neither user data nor metadata is cached. -If this property is set to -.Sy metadata , -then only metadata is cached. -The default value is -.Sy all . -.It Sy quota Ns = Ns Ar size Ns | Ns Sy none -Limits the amount of space a dataset and its descendents can consume. -This property enforces a hard limit on the amount of space used. -This includes all space consumed by descendents, including file systems and -snapshots. -Setting a quota on a descendent of a dataset that already has a quota does not -override the ancestor's quota, but rather imposes an additional limit. -.Pp -Quotas cannot be set on volumes, as the -.Sy volsize -property acts as an implicit quota. -.It Sy snapshot_limit Ns = Ns Ar count Ns | Ns Sy none -Limits the number of snapshots that can be created on a dataset and its -descendents. -Setting a -.Sy snapshot_limit -on a descendent of a dataset that already has a -.Sy snapshot_limit -does not override the ancestor's -.Sy snapshot_limit , -but rather imposes an additional limit. -The limit is not enforced if the user is allowed to change the limit. -For example, this means that recursive snapshots taken from the global zone are -counted against each delegated dataset within a zone. -This feature must be enabled to be used -.Po see -.Xr zpool-features 5 -.Pc . -.It Sy userquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none -Limits the amount of space consumed by the specified user. -User space consumption is identified by the -.Sy userspace@ Ns Ar user -property. -.Pp -Enforcement of user quotas may be delayed by several seconds. -This delay means that a user might exceed their quota before the system notices -that they are over quota and begins to refuse additional writes with the -.Er EDQUOT -error message. -See the -.Nm zfs Cm userspace -command for more information. -.Pp -Unprivileged users can only access their own groups' space usage. -The root user, or a user who has been granted the -.Sy userquota -privilege with -.Nm zfs Cm allow , -can get and set everyone's quota. -.Pp -This property is not available on volumes, on file systems before version 4, or -on pools before version 15. -The -.Sy userquota@ Ns Ar ... -properties are not displayed by -.Nm zfs Cm get Sy all . -The user's name must be appended after the -.Sy @ -symbol, using one of the following forms: -.Bl -bullet -compact -offset 4n -.It -POSIX name -.Pq Qq joe -.It -POSIX numeric ID -.Pq Qq 789 -.It -SID name -.Pq Qq joe.smith@mydomain -.It -SID numeric ID -.Pq Qq S-1-123-456-789 -.El -.Pp -Files created on Linux always have POSIX owners. -.It Sy userobjquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none -The -.Sy userobjquota -is similar to -.Sy userquota -but it limits the number of objects a user can create. -Please refer to -.Sy userobjused -for more information about how objects are counted. -.It Sy groupquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none -Limits the amount of space consumed by the specified group. -Group space consumption is identified by the -.Sy groupused@ Ns Ar group -property. -.Pp -Unprivileged users can access only their own groups' space usage. -The root user, or a user who has been granted the -.Sy groupquota -privilege with -.Nm zfs Cm allow , -can get and set all groups' quotas. -.It Sy groupobjquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none -The -.Sy groupobjquota -is similar to -.Sy groupquota -but it limits number of objects a group can consume. -Please refer to -.Sy userobjused -for more information about how objects are counted. -.It Sy projectquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none -Limits the amount of space consumed by the specified project. -Project space consumption is identified by the -.Sy projectused@ Ns Ar project -property. -Please refer to -.Sy projectused -for more information about how project is identified and set/changed. -.Pp -The root user, or a user who has been granted the -.Sy projectquota -privilege with -.Nm zfs allow , -can access all projects' quota. -.It Sy projectobjquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none -The -.Sy projectobjquota -is similar to -.Sy projectquota -but it limits number of objects a project can consume. -Please refer to -.Sy userobjused -for more information about how objects are counted. -.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off -Controls whether this dataset can be modified. -The default value is -.Sy off . -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy ro -and -.Sy rw -mount options. -.Pp -This property can also be referred to by its shortened column name, -.Sy rdonly . -.It Sy recordsize Ns = Ns Ar size -Specifies a suggested block size for files in the file system. -This property is designed solely for use with database workloads that access -files in fixed-size records. -ZFS automatically tunes block sizes according to internal algorithms optimized -for typical access patterns. -.Pp -For databases that create very large files but access them in small random -chunks, these algorithms may be suboptimal. -Specifying a -.Sy recordsize -greater than or equal to the record size of the database can result in -significant performance gains. -Use of this property for general purpose file systems is strongly discouraged, -and may adversely affect performance. -.Pp -The size specified must be a power of two greater than or equal to -.Ar 512B -and less than or equal to -.Ar 128kB . -If the -.Sy large_blocks -feature is enabled on the pool, the size may be up to -.Ar 1MB . -See -.Xr zpool-features 5 -for details on ZFS feature flags. -.Pp -Changing the file system's -.Sy recordsize -affects only files created afterward; existing files are unaffected. -.Pp -This property can also be referred to by its shortened column name, -.Sy recsize . -.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most -Controls what types of metadata are stored redundantly. -ZFS stores an extra copy of metadata, so that if a single block is corrupted, -the amount of user data lost is limited. -This extra copy is in addition to any redundancy provided at the pool level -.Pq e.g. by mirroring or RAID-Z , -and is in addition to an extra copy specified by the -.Sy copies -property -.Pq up to a total of 3 copies . -For example if the pool is mirrored, -.Sy copies Ns = Ns 2 , -and -.Sy redundant_metadata Ns = Ns Sy most , -then ZFS stores 6 copies of most metadata, and 4 copies of data and some -metadata. -.Pp -When set to -.Sy all , -ZFS stores an extra copy of all metadata. -If a single on-disk block is corrupt, at worst a single block of user data -.Po which is -.Sy recordsize -bytes long -.Pc -can be lost. -.Pp -When set to -.Sy most , -ZFS stores an extra copy of most types of metadata. -This can improve performance of random writes, because less metadata must be -written. -In practice, at worst about 100 blocks -.Po of -.Sy recordsize -bytes each -.Pc -of user data can be lost if a single on-disk block is corrupt. -The exact behavior of which metadata blocks are stored redundantly may change in -future releases. -.Pp -The default value is -.Sy all . -.It Sy refquota Ns = Ns Ar size Ns | Ns Sy none -Limits the amount of space a dataset can consume. -This property enforces a hard limit on the amount of space used. -This hard limit does not include space used by descendents, including file -systems and snapshots. -.It Sy refreservation Ns = Ns Ar size Ns | Ns Sy none Ns | Ns Sy auto -The minimum amount of space guaranteed to a dataset, not including its -descendents. -When the amount of space used is below this value, the dataset is treated as if -it were taking up the amount of space specified by -.Sy refreservation . -The -.Sy refreservation -reservation is accounted for in the parent datasets' space used, and counts -against the parent datasets' quotas and reservations. -.Pp -If -.Sy refreservation -is set, a snapshot is only allowed if there is enough free pool space outside of -this reservation to accommodate the current number of -.Qq referenced -bytes in the dataset. -.Pp -If -.Sy refreservation -is set to -.Sy auto , -a volume is thick provisioned -.Po or -.Qq not sparse -.Pc . -.Sy refreservation Ns = Ns Sy auto -is only supported on volumes. -See -.Sy volsize -in the -.Sx Native Properties -section for more information about sparse volumes. -.Pp -This property can also be referred to by its shortened column name, -.Sy refreserv . -.It Sy relatime Ns = Ns Sy on Ns | Ns Sy off -Controls the manner in which the access time is updated when -.Sy atime Ns = Ns Sy on -is set. -Turning this property on causes the access time to be updated relative -to the modify or change time. -Access time is only updated if the previous -access time was earlier than the current modify or change time or if the -existing access time hasn't been updated within the past 24 hours. -The default value is -.Sy off . -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy relatime -and -.Sy norelatime -mount options. -.It Sy reservation Ns = Ns Ar size Ns | Ns Sy none -The minimum amount of space guaranteed to a dataset and its descendants. -When the amount of space used is below this value, the dataset is treated as if -it were taking up the amount of space specified by its reservation. -Reservations are accounted for in the parent datasets' space used, and count -against the parent datasets' quotas and reservations. -.Pp -This property can also be referred to by its shortened column name, -.Sy reserv . -.It Sy secondarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata -Controls what is cached in the secondary cache -.Pq L2ARC . -If this property is set to -.Sy all , -then both user data and metadata is cached. -If this property is set to -.Sy none , -then neither user data nor metadata is cached. -If this property is set to -.Sy metadata , -then only metadata is cached. -The default value is -.Sy all . -.It Sy setuid Ns = Ns Sy on Ns | Ns Sy off -Controls whether the setuid bit is respected for the file system. -The default value is -.Sy on . -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy suid -and -.Sy nosuid -mount options. -.It Sy sharesmb Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts -Controls whether the file system is shared by using -.Sy Samba USERSHARES -and what options are to be used. -Otherwise, the file system is automatically shared and unshared with the -.Nm zfs Cm share -and -.Nm zfs Cm unshare -commands. -If the property is set to on, the -.Xr net 8 -command is invoked to create a -.Sy USERSHARE . -.Pp -Because SMB shares requires a resource name, a unique resource name is -constructed from the dataset name. -The constructed name is a copy of the -dataset name except that the characters in the dataset name, which would be -invalid in the resource name, are replaced with underscore (_) characters. -Linux does not currently support additional options which might be available -on Solaris. -.Pp -If the -.Sy sharesmb -property is set to -.Sy off , -the file systems are unshared. -.Pp -The share is created with the ACL (Access Control List) "Everyone:F" ("F" -stands for "full permissions", i.e. read and write permissions) and no guest -access (which means Samba must be able to authenticate a real user, system -passwd/shadow, LDAP or smbpasswd based) by default. -This means that any additional access control -(disallow specific user specific access etc) must be done on the underlying file system. -.It Sy sharenfs Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts -Controls whether the file system is shared via NFS, and what options are to be -used. -A file system with a -.Sy sharenfs -property of -.Sy off -is managed with the -.Xr exportfs 8 -command and entries in the -.Pa /etc/exports -file. -Otherwise, the file system is automatically shared and unshared with the -.Nm zfs Cm share -and -.Nm zfs Cm unshare -commands. -If the property is set to -.Sy on , -the dataset is shared using the default options: -.Dl sec=sys,rw,crossmnt,no_subtree_check -.Pp -See -.Xr exports 5 -for the meaning of the default options. -Otherwise, the -.Xr exportfs 8 -command is invoked with options equivalent to the contents of this property. -.Pp -When the -.Sy sharenfs -property is changed for a dataset, the dataset and any children inheriting the -property are re-shared with the new options, only if the property was previously -.Sy off , -or if they were shared before the property was changed. -If the new property is -.Sy off , -the file systems are unshared. -.It Sy logbias Ns = Ns Sy latency Ns | Ns Sy throughput -Provide a hint to ZFS about handling of synchronous requests in this dataset. -If -.Sy logbias -is set to -.Sy latency -.Pq the default , -ZFS will use pool log devices -.Pq if configured -to handle the requests at low latency. -If -.Sy logbias -is set to -.Sy throughput , -ZFS will not use configured pool log devices. -ZFS will instead optimize synchronous operations for global pool throughput and -efficient use of resources. -.It Sy snapdev Ns = Ns Sy hidden Ns | Ns Sy visible -Controls whether the volume snapshot devices under -.Pa /dev/zvol/ Ns Aq Ar pool -are hidden or visible. -The default value is -.Sy hidden . -.It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible -Controls whether the -.Pa .zfs -directory is hidden or visible in the root of the file system as discussed in -the -.Sx Snapshots -section of -.Xr zfsconcepts 8 . -The default value is -.Sy hidden . -.It Sy sync Ns = Ns Sy standard Ns | Ns Sy always Ns | Ns Sy disabled -Controls the behavior of synchronous requests -.Pq e.g. fsync, O_DSYNC . -.Sy standard -is the POSIX-specified behavior of ensuring all synchronous requests -are written to stable storage and all devices are flushed to ensure -data is not cached by device controllers -.Pq this is the default . -.Sy always -causes every file system transaction to be written and flushed before its -system call returns. -This has a large performance penalty. -.Sy disabled -disables synchronous requests. -File system transactions are only committed to stable storage periodically. -This option will give the highest performance. -However, it is very dangerous as ZFS would be ignoring the synchronous -transaction demands of applications such as databases or NFS. -Administrators should only use this option when the risks are understood. -.It Sy version Ns = Ns Ar N Ns | Ns Sy current -The on-disk version of this file system, which is independent of the pool -version. -This property can only be set to later supported versions. -See the -.Nm zfs Cm upgrade -command. -.It Sy volsize Ns = Ns Ar size -For volumes, specifies the logical size of the volume. -By default, creating a volume establishes a reservation of equal size. -For storage pools with a version number of 9 or higher, a -.Sy refreservation -is set instead. -Any changes to -.Sy volsize -are reflected in an equivalent change to the reservation -.Pq or Sy refreservation . -The -.Sy volsize -can only be set to a multiple of -.Sy volblocksize , -and cannot be zero. -.Pp -The reservation is kept equal to the volume's logical size to prevent unexpected -behavior for consumers. -Without the reservation, the volume could run out of space, resulting in -undefined behavior or data corruption, depending on how the volume is used. -These effects can also occur when the volume size is changed while it is in use -.Pq particularly when shrinking the size . -Extreme care should be used when adjusting the volume size. -.Pp -Though not recommended, a -.Qq sparse volume -.Po also known as -.Qq thin provisioned -.Pc -can be created by specifying the -.Fl s -option to the -.Nm zfs Cm create Fl V -command, or by changing the value of the -.Sy refreservation -property -.Po or -.Sy reservation -property on pool version 8 or earlier -.Pc -after the volume has been created. -A -.Qq sparse volume -is a volume where the value of -.Sy refreservation -is less than the size of the volume plus the space required to store its -metadata. -Consequently, writes to a sparse volume can fail with -.Er ENOSPC -when the pool is low on space. -For a sparse volume, changes to -.Sy volsize -are not reflected in the -.Sy refreservation . -A volume that is not sparse is said to be -.Qq thick provisioned . -A sparse volume can become thick provisioned by setting -.Sy refreservation -to -.Sy auto . -.It Sy volmode Ns = Ns Sy default Ns | Ns Sy full Ns | Ns Sy geom Ns | Ns Sy dev Ns | Ns Sy none -This property specifies how volumes should be exposed to the OS. -Setting it to -.Sy full -exposes volumes as fully fledged block devices, providing maximal -functionality. -The value -.Sy geom -is just an alias for -.Sy full -and is kept for compatibility. -Setting it to -.Sy dev -hides its partitions. -Volumes with property set to -.Sy none -are not exposed outside ZFS, but can be snapshotted, cloned, replicated, etc, -that can be suitable for backup purposes. -Value -.Sy default -means that volumes exposition is controlled by system-wide tunable -.Sy zvol_volmode , -where -.Sy full , -.Sy dev -and -.Sy none -are encoded as 1, 2 and 3 respectively. -The default value is -.Sy full . -.It Sy vscan Ns = Ns Sy on Ns | Ns Sy off -Controls whether regular files should be scanned for viruses when a file is -opened and closed. -In addition to enabling this property, the virus scan service must also be -enabled for virus scanning to occur. -The default value is -.Sy off . -This property is not used by OpenZFS. -.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa -Controls whether extended attributes are enabled for this file system. -Two styles of extended attributes are supported: either directory based -or system attribute based. -.Pp -The default value of -.Sy on -enables directory based extended attributes. -This style of extended attribute imposes no practical limit -on either the size or number of attributes which can be set on a file. -Although under Linux the -.Xr getxattr 2 -and -.Xr setxattr 2 -system calls limit the maximum size to 64K. -This is the most compatible -style of extended attribute and is supported by all ZFS implementations. -.Pp -System attribute based xattrs can be enabled by setting the value to -.Sy sa . -The key advantage of this type of xattr is improved performance. -Storing extended attributes as system attributes -significantly decreases the amount of disk IO required. -Up to 64K of data may be stored per-file in the space reserved for system attributes. -If there is not enough space available for an extended attribute -then it will be automatically written as a directory based xattr. -System attribute based extended attributes are not accessible -on platforms which do not support the -.Sy xattr Ns = Ns Sy sa -feature. -OpenZFS supports -.Sy xattr Ns = Ns Sy sa -on both -.Fx -and Linux. -.Pp -The use of system attribute based xattrs is strongly encouraged for users of -SELinux or POSIX ACLs. -Both of these features heavily rely on extended -attributes and benefit significantly from the reduced access time. -.Pp -The values -.Sy on -and -.Sy off -are equivalent to the -.Sy xattr -and -.Sy noxattr -mount options. -.It Sy jailed Ns = Ns Sy off Ns | Ns Sy on -Controls whether the dataset is managed from a jail. -See -.Xr zfs-jail 8 -for more information. -Jails are a -.Fx -feature and are not relevant on other platforms. -The default value is -.Sy off . -.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off -Controls whether the dataset is managed from a non-global zone. -Zones are a Solaris feature and are not relevant on other platforms. -The default value is -.Sy off . -.El -.Pp -The following three properties cannot be changed after the file system is -created, and therefore, should be set when the file system is created. -If the properties are not set with the -.Nm zfs Cm create -or -.Nm zpool Cm create -commands, these properties are inherited from the parent dataset. -If the parent dataset lacks these properties due to having been created prior to -these features being supported, the new file system will have the default values -for these properties. -.Bl -tag -width "" -.It Xo -.Sy casesensitivity Ns = Ns Sy sensitive Ns | Ns -.Sy insensitive Ns | Ns Sy mixed -.Xc -Indicates whether the file name matching algorithm used by the file system -should be case-sensitive, case-insensitive, or allow a combination of both -styles of matching. -The default value for the -.Sy casesensitivity -property is -.Sy sensitive . -Traditionally, -.Ux -and POSIX file systems have case-sensitive file names. -.Pp -The -.Sy mixed -value for the -.Sy casesensitivity -property indicates that the file system can support requests for both -case-sensitive and case-insensitive matching behavior. -Currently, case-insensitive matching behavior on a file system that supports -mixed behavior is limited to the SMB server product. -For more information about the -.Sy mixed -value behavior, see the "ZFS Administration Guide". -.It Xo -.Sy normalization Ns = Ns Sy none Ns | Ns Sy formC Ns | Ns -.Sy formD Ns | Ns Sy formKC Ns | Ns Sy formKD -.Xc -Indicates whether the file system should perform a -.Sy unicode -normalization of file names whenever two file names are compared, and which -normalization algorithm should be used. -File names are always stored unmodified, names are normalized as part of any -comparison process. -If this property is set to a legal value other than -.Sy none , -and the -.Sy utf8only -property was left unspecified, the -.Sy utf8only -property is automatically set to -.Sy on . -The default value of the -.Sy normalization -property is -.Sy none . -This property cannot be changed after the file system is created. -.It Sy utf8only Ns = Ns Sy on Ns | Ns Sy off -Indicates whether the file system should reject file names that include -characters that are not present in the -.Sy UTF-8 -character code set. -If this property is explicitly set to -.Sy off , -the normalization property must either not be explicitly set or be set to -.Sy none . -The default value for the -.Sy utf8only -property is -.Sy off . -This property cannot be changed after the file system is created. -.El -.Pp -The -.Sy casesensitivity , -.Sy normalization , -and -.Sy utf8only -properties are also new permissions that can be assigned to non-privileged users -by using the ZFS delegated administration feature. -. -.Ss Temporary Mount Point Properties -When a file system is mounted, either through -.Xr mount 8 -for legacy mounts or the -.Nm zfs Cm mount -command for normal file systems, its mount options are set according to its -properties. -The correlation between properties and mount options is as follows: -.Bl -tag -compact -offset Ds -width "rootcontext=" -.It Sy atime -atime/noatime -.It Sy canmount -auto/noauto -.It Sy devices -dev/nodev -.It Sy exec -exec/noexec -.It Sy readonly -ro/rw -.It Sy relatime -relatime/norelatime -.It Sy setuid -suid/nosuid -.It Sy xattr -xattr/noxattr -.It Sy nbmand -mand/nomand -.It Sy context Ns = -context= -.It Sy fscontext Ns = -fscontext= -.It Sy defcontext Ns = -defcontext= -.It Sy rootcontext Ns = -rootcontext= -.El -.Pp -In addition, these options can be set on a per-mount basis using the -.Fl o -option, without affecting the property that is stored on disk. -The values specified on the command line override the values stored in the -dataset. -The -.Sy nosuid -option is an alias for -.Sy nodevices , Ns Sy nosetuid . -These properties are reported as -.Qq temporary -by the -.Nm zfs Cm get -command. -If the properties are changed while the dataset is mounted, the new setting -overrides any temporary settings. -. -.Ss User Properties -In addition to the standard native properties, ZFS supports arbitrary user -properties. -User properties have no effect on ZFS behavior, but applications or -administrators can use them to annotate datasets -.Pq file systems, volumes, and snapshots . -.Pp -User property names must contain a colon -.Pq Qq Sy \&: -character to distinguish them from native properties. -They may contain lowercase letters, numbers, and the following punctuation -characters: colon -.Pq Qq Sy \&: , -dash -.Pq Qq Sy - , -period -.Pq Qq Sy \&. , -and underscore -.Pq Qq Sy _ . -The expected convention is that the property name is divided into two portions -such as -.Ar module : Ns Ar property , -but this namespace is not enforced by ZFS. -User property names can be at most 256 characters, and cannot begin with a dash -.Pq Qq Sy - . -.Pp -When making programmatic use of user properties, it is strongly suggested to use -a reversed DNS domain name for the -.Ar module -component of property names to reduce the chance that two -independently-developed packages use the same property name for different -purposes. -.Pp -The values of user properties are arbitrary strings, are always inherited, and -are never validated. -All of the commands that operate on properties -.Po Nm zfs Cm list , -.Nm zfs Cm get , -.Nm zfs Cm set , -and so forth -.Pc -can be used to manipulate both native properties and user properties. -Use the -.Nm zfs Cm inherit -command to clear a user property. -If the property is not defined in any parent dataset, it is removed entirely. -Property values are limited to 8192 bytes. diff --git a/man/man8/zgenhostid.8 b/man/man8/zgenhostid.8 index 4f926f473b9c..3eff55b6d861 100644 --- a/man/man8/zgenhostid.8 +++ b/man/man8/zgenhostid.8 @@ -83,7 +83,7 @@ digits long, optionally prefixed by .Xr genhostid 1 , .Xr hostid 1 , .Xr sethostid 3 , -.Xr spl-module-parameters 5 +.Xr spl 4 .Sh HISTORY .Nm emulates the diff --git a/man/man8/zpool-add.8 b/man/man8/zpool-add.8 index a0f15076f230..26cf33c5538c 100644 --- a/man/man8/zpool-add.8 +++ b/man/man8/zpool-add.8 @@ -46,7 +46,7 @@ The specification is described in the .Em Virtual Devices section of -.Xr zpoolconcepts 8 . +.Xr zpoolconcepts 7 . The behavior of the .Fl f option, and the device checks performed are described in the @@ -87,7 +87,7 @@ flag. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for a list of valid properties that can be set. The only property supported at the moment is .Sy ashift . diff --git a/man/man8/zpool-attach.8 b/man/man8/zpool-attach.8 index 04c0fca21d0d..19d8f6ac07ac 100644 --- a/man/man8/zpool-attach.8 +++ b/man/man8/zpool-attach.8 @@ -71,7 +71,7 @@ Not all devices can be overridden in this manner. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for a list of valid properties that can be set. The only property supported at the moment is .Sy ashift . diff --git a/man/man8/zpool-create.8 b/man/man8/zpool-create.8 index 91e6f427d837..e902c770076d 100644 --- a/man/man8/zpool-create.8 +++ b/man/man8/zpool-create.8 @@ -80,7 +80,7 @@ The specification is described in the .Sx Virtual Devices section of -.Xr zpoolconcepts 8 . +.Xr zpoolconcepts 7 . .Pp The command attempts to verify that each device specified is accessible and not currently in use by another subsystem. @@ -139,7 +139,7 @@ Individual features can be enabled by setting their corresponding properties to with .Fl o . See -.Xr zpool-features 5 +.Xr zpool-features 7 for details about feature properties. .It Fl f Forces use of @@ -160,7 +160,7 @@ The mount point must be an absolute path, or .Sy none . For more information on dataset mount points, see -.Xr zfsprops 8 . +.Xr zfsprops 7 . .It Fl n Displays the configuration that would be used without actually creating the pool. @@ -169,23 +169,23 @@ device sharing. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See -.Xr zpoolprops 8 +.Xr zpoolprops 7 for a list of valid properties that can be set. .It Fl o Ar compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns … Specifies compatibility feature sets. See -.Xr zpool-features 5 +.Xr zpool-features 7 for more information about compatibility feature sets. .It Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Sets the given pool feature. See the -.Xr zpool-features 5 +.Xr zpool-features 7 section for a list of valid features that can be set. Value can be either disabled or enabled. .It Fl O Ar file-system-property Ns = Ns Ar value Sets the given file system properties in the root file system of the pool. See -.Xr zfsprops 8 +.Xr zfsprops 7 for a list of valid properties that can be set. .It Fl R Ar root Equivalent to diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8 index a95ce48d93a8..ab1d6ea56213 100644 --- a/man/man8/zpool-events.8 +++ b/man/man8/zpool-events.8 @@ -49,10 +49,11 @@ These events are consumed by the and used to automate administrative tasks such as replacing a failed device with a hot spare. For more information about the subclasses and event payloads -that can be generated see the -.Xr zfs-events 5 -man page. -.Pp +that can be generated see +.Sx EVENTS +and the following sections. +. +.Sh OPTIONS .Bl -tag -compact -width Ds .It Fl c Clear all previous events. @@ -66,8 +67,417 @@ single tab instead of arbitrary space. Print the entire payload for each event. .El . +.Sh EVENTS +Theese are the different event subclasses. +The full event name would be +.Sy ereport.fs.zfs.\& Ns Em SUBCLASS , +but only the last part is listed here. +.Pp +.Bl -tag -compact -width "vdev.bad_guid_sum" +.It Sy checksum +Issued when a checksum error has been detected. +.It Sy io +Issued when there is an I/O error in a vdev in the pool. +.It Sy data +Issued when there have been data errors in the pool. +.It Sy deadman +Issued when an I/O request is determined to be "hung", this can be caused +by lost completion events due to flaky hardware or drivers. +See +.Sy zfs_deadman_failmode +in +.Xr zfs 4 +for additional information regarding "hung" I/O detection and configuration. +.It Sy delay +Issued when a completed I/O request exceeds the maximum allowed time +specified by the +.Sy zio_slow_io_ms +module parameter. +This can be an indicator of problems with the underlying storage device. +The number of delay events is ratelimited by the +.Sy zfs_slow_io_events_per_second +module parameter. +.It Sy config +Issued every time a vdev change have been done to the pool. +.It Sy zpool +Issued when a pool cannot be imported. +.It Sy zpool.destroy +Issued when a pool is destroyed. +.It Sy zpool.export +Issued when a pool is exported. +.It Sy zpool.import +Issued when a pool is imported. +.It Sy zpool.reguid +Issued when a REGUID (new unique identifier for the pool have been regenerated) have been detected. +.It Sy vdev.unknown +Issued when the vdev is unknown. +Such as trying to clear device errors on a vdev that have failed/been kicked +from the system/pool and is no longer available. +.It Sy vdev.open_failed +Issued when a vdev could not be opened (because it didn't exist for example). +.It Sy vdev.corrupt_data +Issued when corrupt data have been detected on a vdev. +.It Sy vdev.no_replicas +Issued when there are no more replicas to sustain the pool. +This would lead to the pool being +.Em DEGRADED . +.It Sy vdev.bad_guid_sum +Issued when a missing device in the pool have been detected. +.It Sy vdev.too_small +Issued when the system (kernel) have removed a device, and ZFS +notices that the device isn't there any more. +This is usually followed by a +.Sy probe_failure +event. +.It Sy vdev.bad_label +Issued when the label is OK but invalid. +.It Sy vdev.bad_ashift +Issued when the ashift alignment requirement has increased. +.It Sy vdev.remove +Issued when a vdev is detached from a mirror (or a spare detached from a +vdev where it have been used to replace a failed drive - only works if +the original drive have been readded). +.It Sy vdev.clear +Issued when clearing device errors in a pool. +Such as running +.Nm zpool Cm clear +on a device in the pool. +.It Sy vdev.check +Issued when a check to see if a given vdev could be opened is started. +.It Sy vdev.spare +Issued when a spare have kicked in to replace a failed device. +.It Sy vdev.autoexpand +Issued when a vdev can be automatically expanded. +.It Sy io_failure +Issued when there is an I/O failure in a vdev in the pool. +.It Sy probe_failure +Issued when a probe fails on a vdev. +This would occur if a vdev +have been kicked from the system outside of ZFS (such as the kernel +have removed the device). +.It Sy log_replay +Issued when the intent log cannot be replayed. +The can occur in the case of a missing or damaged log device. +.It Sy resilver.start +Issued when a resilver is started. +.It Sy resilver.finish +Issued when the running resilver have finished. +.It Sy scrub.start +Issued when a scrub is started on a pool. +.It Sy scrub.finish +Issued when a pool has finished scrubbing. +.It Sy scrub.abort +Issued when a scrub is aborted on a pool. +.It Sy scrub.resume +Issued when a scrub is resumed on a pool. +.It Sy scrub.paused +Issued when a scrub is paused on a pool. +.It Sy bootfs.vdev.attach +.El +. +.Sh PAYLOADS +This is the payload (data, information) that accompanies an +event. +.Pp +For +.Xr zed 8 , +these are set to uppercase and prefixed with +.Sy ZEVENT_ . +.Pp +.Bl -tag -compact -width "vdev_cksum_errors" +.It Sy pool +Pool name. +.It Sy pool_failmode +Failmode - +.Sy wait , +.Sy continue , +or +.Sy panic . +See the +.Sy failmode +property in +.Xr zpoolprops 7 +for more information. +.It Sy pool_guid +The GUID of the pool. +.It Sy pool_context +The load state for the pool (0=none, 1=open, 2=import, 3=tryimport, 4=recover +5=error). +.It Sy vdev_guid +The GUID of the vdev in question (the vdev failing or operated upon with +.Nm zpool Cm clear , +etc.). +.It Sy vdev_type +Type of vdev - +.Sy disk , +.Sy file , +.Sy mirror , +etc. +See the +.Sy Virtual Devices +section of +.Xr zpoolconcepts 7 +for more information on possible values. +.It Sy vdev_path +Full path of the vdev, including any +.Em -partX . +.It Sy vdev_devid +ID of vdev (if any). +.It Sy vdev_fru +Physical FRU location. +.It Sy vdev_state +State of vdev (0=uninitialized, 1=closed, 2=offline, 3=removed, 4=failed to open, 5=faulted, 6=degraded, 7=healthy). +.It Sy vdev_ashift +The ashift value of the vdev. +.It Sy vdev_complete_ts +The time the last I/O request completed for the specified vdev. +.It Sy vdev_delta_ts +The time since the last I/O request completed for the specified vdev. +.It Sy vdev_spare_paths +List of spares, including full path and any +.Em -partX . +.It Sy vdev_spare_guids +GUID(s) of spares. +.It Sy vdev_read_errors +How many read errors that have been detected on the vdev. +.It Sy vdev_write_errors +How many write errors that have been detected on the vdev. +.It Sy vdev_cksum_errors +How many checksum errors that have been detected on the vdev. +.It Sy parent_guid +GUID of the vdev parent. +.It Sy parent_type +Type of parent. +See +.Sy vdev_type . +.It Sy parent_path +Path of the vdev parent (if any). +.It Sy parent_devid +ID of the vdev parent (if any). +.It Sy zio_objset +The object set number for a given I/O request. +.It Sy zio_object +The object number for a given I/O request. +.It Sy zio_level +The indirect level for the block. +Level 0 is the lowest level and includes data blocks. +Values > 0 indicate metadata blocks at the appropriate level. +.It Sy zio_blkid +The block ID for a given I/O request. +.It Sy zio_err +The error number for a failure when handling a given I/O request, +compatible with +.Xr errno 3 +with the value of +.Sy EBADE +used to indicate a ZFS checksum error. +.It Sy zio_offset +The offset in bytes of where to write the I/O request for the specified vdev. +.It Sy zio_size +The size in bytes of the I/O request. +.It Sy zio_flags +The current flags describing how the I/O request should be handled. +See the +.Sy I/O FLAGS +section for the full list of I/O flags. +.It Sy zio_stage +The current stage of the I/O in the pipeline. +See the +.Sy I/O STAGES +section for a full list of all the I/O stages. +.It Sy zio_pipeline +The valid pipeline stages for the I/O. +See the +.Sy I/O STAGES +section for a full list of all the I/O stages. +.It Sy zio_delay +The time elapsed (in nanoseconds) waiting for the block layer to complete the +I/O request. +Unlike +.Sy zio_delta , +this does not include any vdev queuing time and is +therefore solely a measure of the block layer performance. +.It Sy zio_timestamp +The time when a given I/O request was submitted. +.It Sy zio_delta +The time required to service a given I/O request. +.It Sy prev_state +The previous state of the vdev. +.It Sy cksum_expected +The expected checksum value for the block. +.It Sy cksum_actual +The actual checksum value for an errant block. +.It Sy cksum_algorithm +Checksum algorithm used. +See +.Xr zfsprops 7 +for more information on the available checksum algorithms. +.It Sy cksum_byteswap +Whether or not the data is byteswapped. +.It Sy bad_ranges +.No [\& Ns Ar start , end ) +pairs of corruption offsets. +Offsets are always aligned on a 64-bit boundary, +and can include some gaps of non-corruption. +(See +.Sy bad_ranges_min_gap ) +.It Sy bad_ranges_min_gap +In order to bound the size of the +.Sy bad_ranges +array, gaps of non-corruption +less than or equal to +.Sy bad_ranges_min_gap +bytes have been merged with +adjacent corruption. +Always at least 8 bytes, since corruption is detected on a 64-bit word basis. +.It Sy bad_range_sets +This array has one element per range in +.Sy bad_ranges . +Each element contains +the count of bits in that range which were clear in the good data and set +in the bad data. +.It Sy bad_range_clears +This array has one element per range in +.Sy bad_ranges . +Each element contains +the count of bits for that range which were set in the good data and clear in +the bad data. +.It Sy bad_set_bits +If this field exists, it is an array of +.Pq Ar bad data No & ~( Ns Ar good data ) ; +that is, the bits set in the bad data which are cleared in the good data. +Each element corresponds a byte whose offset is in a range in +.Sy bad_ranges , +and the array is ordered by offset. +Thus, the first element is the first byte in the first +.Sy bad_ranges +range, and the last element is the last byte in the last +.Sy bad_ranges +range. +.It Sy bad_cleared_bits +Like +.Sy bad_set_bits , +but contains +.Pq Ar good data No & ~( Ns Ar bad data ) ; +that is, the bits set in the good data which are cleared in the bad data. +.It Sy bad_set_histogram +If this field exists, it is an array of counters. +Each entry counts bits set in a particular bit of a big-endian uint64 type. +The first entry counts bits +set in the high-order bit of the first byte, the 9th byte, etc, and the last +entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc. +This information is useful for observing a stuck bit in a parallel data path, +such as IDE or parallel SCSI. +.It Sy bad_cleared_histogram +If this field exists, it is an array of counters. +Each entry counts bit clears in a particular bit of a big-endian uint64 type. +The first entry counts bits +clears of the high-order bit of the first byte, the 9th byte, etc, and the +last entry counts clears of the low-order bit of the 8th byte, the 16th byte, etc. +This information is useful for observing a stuck bit in a parallel data +path, such as IDE or parallel SCSI. +.El +. +.Sh I/O STAGES +The ZFS I/O pipeline is comprised of various stages which are defined below. +The individual stages are used to construct these basic I/O +operations: Read, Write, Free, Claim, and Ioctl. +These stages may be +set on an event to describe the life cycle of a given I/O request. +.Pp +.TS +tab(:); +l l l . +Stage:Bit Mask:Operations +_:_:_ +ZIO_STAGE_OPEN:0x00000001:RWFCI + +ZIO_STAGE_READ_BP_INIT:0x00000002:R---- +ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W--- +ZIO_STAGE_FREE_BP_INIT:0x00000008:--F-- +ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF-- +ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W--- + +ZIO_STAGE_ENCRYPT:0x00000040:-W--- +ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W--- + +ZIO_STAGE_NOP_WRITE:0x00000100:-W--- + +ZIO_STAGE_DDT_READ_START:0x00000200:R---- +ZIO_STAGE_DDT_READ_DONE:0x00000400:R---- +ZIO_STAGE_DDT_WRITE:0x00000800:-W--- +ZIO_STAGE_DDT_FREE:0x00001000:--F-- + +ZIO_STAGE_GANG_ASSEMBLE:0x00002000:RWFC- +ZIO_STAGE_GANG_ISSUE:0x00004000:RWFC- + +ZIO_STAGE_DVA_THROTTLE:0x00008000:-W--- +ZIO_STAGE_DVA_ALLOCATE:0x00010000:-W--- +ZIO_STAGE_DVA_FREE:0x00020000:--F-- +ZIO_STAGE_DVA_CLAIM:0x00040000:---C- + +ZIO_STAGE_READY:0x00080000:RWFCI + +ZIO_STAGE_VDEV_IO_START:0x00100000:RW--I +ZIO_STAGE_VDEV_IO_DONE:0x00200000:RW--I +ZIO_STAGE_VDEV_IO_ASSESS:0x00400000:RW--I + +ZIO_STAGE_CHECKSUM_VERIFY:0x00800000:R---- + +ZIO_STAGE_DONE:0x01000000:RWFCI +.TE +. +.Sh I/O FLAGS +Every I/O request in the pipeline contains a set of flags which describe its +function and are used to govern its behavior. +These flags will be set in an event as a +.Sy zio_flags +payload entry. +.Pp +.TS +tab(:); +l l . +Flag:Bit Mask +_:_ +ZIO_FLAG_DONT_AGGREGATE:0x00000001 +ZIO_FLAG_IO_REPAIR:0x00000002 +ZIO_FLAG_SELF_HEAL:0x00000004 +ZIO_FLAG_RESILVER:0x00000008 +ZIO_FLAG_SCRUB:0x00000010 +ZIO_FLAG_SCAN_THREAD:0x00000020 +ZIO_FLAG_PHYSICAL:0x00000040 + +ZIO_FLAG_CANFAIL:0x00000080 +ZIO_FLAG_SPECULATIVE:0x00000100 +ZIO_FLAG_CONFIG_WRITER:0x00000200 +ZIO_FLAG_DONT_RETRY:0x00000400 +ZIO_FLAG_DONT_CACHE:0x00000800 +ZIO_FLAG_NODATA:0x00001000 +ZIO_FLAG_INDUCE_DAMAGE:0x00002000 + +ZIO_FLAG_IO_ALLOCATING:0x00004000 +ZIO_FLAG_IO_RETRY:0x00008000 +ZIO_FLAG_PROBE:0x00010000 +ZIO_FLAG_TRYHARD:0x00020000 +ZIO_FLAG_OPTIONAL:0x00040000 + +ZIO_FLAG_DONT_QUEUE:0x00080000 +ZIO_FLAG_DONT_PROPAGATE:0x00100000 +ZIO_FLAG_IO_BYPASS:0x00200000 +ZIO_FLAG_IO_REWRITE:0x00400000 +ZIO_FLAG_RAW_COMPRESS:0x00800000 +ZIO_FLAG_RAW_ENCRYPT:0x01000000 + +ZIO_FLAG_GANG_CHILD:0x02000000 +ZIO_FLAG_DDT_CHILD:0x04000000 +ZIO_FLAG_GODFATHER:0x08000000 +ZIO_FLAG_NOPWRITE:0x10000000 +ZIO_FLAG_REEXECUTED:0x20000000 +ZIO_FLAG_DELEGATED:0x40000000 +ZIO_FLAG_FASTWRITE:0x80000000 +.TE +. .Sh SEE ALSO -.Xr zfs-events 5 , -.Xr zfs-module-parameters 5 , +.Xr zfs 4 , .Xr zed 8 , .Xr zpool-wait 8 diff --git a/man/man8/zpool-get.8 b/man/man8/zpool-get.8 index 06908238999c..55904f169e24 100644 --- a/man/man8/zpool-get.8 +++ b/man/man8/zpool-get.8 @@ -76,7 +76,7 @@ Property source, either .El .Pp See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for more information on the available pool properties. .Bl -tag -compact -offset Ds -width "-o field" .It Fl H @@ -97,12 +97,12 @@ Display numbers in parsable (exact) values. .Xc Sets the given property on the specified pool. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for more information on what properties can be set and acceptable values. .El . .Sh SEE ALSO -.Xr zpool-features 5 , -.Xr zpool-list 8 , -.Xr zpoolprops 8 +.Xr zpool-features 7 , +.Xr zpoolprops 7 , +.Xr zpool-list 8 diff --git a/man/man8/zpool-import.8 b/man/man8/zpool-import.8 index 1b1f3c5ae5b0..518e3cf1d76a 100644 --- a/man/man8/zpool-import.8 +++ b/man/man8/zpool-import.8 @@ -201,7 +201,7 @@ for a description of dataset properties and mount options. .It Fl o Ar property Ns = Ns Ar value Sets the specified property on the imported pool. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for more information on the available pool properties. .It Fl R Ar root Sets the @@ -347,7 +347,7 @@ for a description of dataset properties and mount options. .It Fl o Ar property Ns = Ns Ar value Sets the specified property on the imported pool. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for more information on the available pool properties. .It Fl R Ar root Sets the diff --git a/man/man8/zpool-list.8 b/man/man8/zpool-list.8 index 3dec7370c5e8..dd4e13c16042 100644 --- a/man/man8/zpool-list.8 +++ b/man/man8/zpool-list.8 @@ -69,7 +69,7 @@ space. .It Fl o Ar property Comma-separated list of properties to display. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for a list of valid properties. The default list is .Sy name , size , allocated , free , checkpoint, expandsize , fragmentation , diff --git a/man/man8/zpool-remove.8 b/man/man8/zpool-remove.8 index 5d866cb50d4d..1429180385cc 100644 --- a/man/man8/zpool-remove.8 +++ b/man/man8/zpool-remove.8 @@ -70,7 +70,7 @@ If an IO error is encountered during the removal process it will be cancelled. The .Sy device_removal feature flag must be enabled to remove a top-level vdev, see -.Xr zpool-features 5 . +.Xr zpool-features 7 . .Pp A mirrored top-level device (log or data) can be removed by specifying the top-level mirror for the same. diff --git a/man/man8/zpool-replace.8 b/man/man8/zpool-replace.8 index eadb5681895b..2b2875ed4292 100644 --- a/man/man8/zpool-replace.8 +++ b/man/man8/zpool-replace.8 @@ -77,7 +77,7 @@ Not all devices can be overridden in this manner. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for a list of valid properties that can be set. The only property supported at the moment is .Sy ashift . diff --git a/man/man8/zpool-split.8 b/man/man8/zpool-split.8 index 7a1a13d5db41..c3b05c2366bf 100644 --- a/man/man8/zpool-split.8 +++ b/man/man8/zpool-split.8 @@ -98,7 +98,7 @@ flag. Sets the specified property for .Ar newpool . See the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page for more information on the available pool properties. .It Fl R Ar root Set diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index da5f95e29cdc..7c825f69d8e2 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -50,7 +50,7 @@ is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the .Sx Device Failure and Recovery section of -.Xr zpoolconcepts 8 . +.Xr zpoolconcepts 7 . .Pp If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. diff --git a/man/man8/zpool-sync.8 b/man/man8/zpool-sync.8 index 6d4aa2c29c48..aa68a5729e9f 100644 --- a/man/man8/zpool-sync.8 +++ b/man/man8/zpool-sync.8 @@ -48,6 +48,6 @@ will sync all pools on the system. Otherwise, it will sync only the specified pools. . .Sh SEE ALSO +.Xr zpoolconcepts 7 , .Xr zpool-export 8 , -.Xr zpool-iostat 8 , -.Xr zpoolconcepts 8 +.Xr zpool-iostat 8 diff --git a/man/man8/zpool-trim.8 b/man/man8/zpool-trim.8 index f709dd85414c..d9a7b4400301 100644 --- a/man/man8/zpool-trim.8 +++ b/man/man8/zpool-trim.8 @@ -86,6 +86,6 @@ Wait until the devices are done being trimmed before returning. .El . .Sh SEE ALSO +.Xr zpoolprops 7 , .Xr zpool-initialize 8 , -.Xr zpool-wait 8 , -.Xr zpoolprops 8 +.Xr zpool-wait 8 diff --git a/man/man8/zpool-upgrade.8 b/man/man8/zpool-upgrade.8 index 0e67e7884c72..1b13bad898bf 100644 --- a/man/man8/zpool-upgrade.8 +++ b/man/man8/zpool-upgrade.8 @@ -66,7 +66,7 @@ property). .Xc Displays legacy ZFS versions supported by the this version of ZFS. See -.Xr zpool-features 5 +.Xr zpool-features 7 for a description of feature flags features supported by this version of ZFS. .It Xo .Nm zpool @@ -87,7 +87,7 @@ then no upgrade will take place. Once this is done, the pool will no longer be accessible on systems that do not support feature flags. See -.Xr zpool-features 5 +.Xr zpool-features 7 for details on compatibility with systems that support feature flags, but do not support all features enabled on the pool. .Bl -tag -width Ds @@ -103,7 +103,7 @@ supported legacy version number. .El . .Sh SEE ALSO -.Xr zpool-features 5 , -.Xr zpool-history 8 , -.Xr zpoolconcepts 8 , -.Xr zpoolprops 8 +.Xr zpool-features 7 , +.Xr zpoolconcepts 7 , +.Xr zpoolprops 7 , +.Xr zpool-history 8 diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index dac35eee77b9..192a8e2eac8d 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -54,7 +54,7 @@ See for information on managing datasets. .Pp For an overview of creating and managing ZFS storage pools see the -.Xr zpoolconcepts 8 +.Xr zpoolconcepts 7 manual page. . .Sh SUBCOMMANDS @@ -126,7 +126,7 @@ Creates a new pool by splitting all mirrors in an existing pool (which decreases . .Ss Properties Available pool properties listed in the -.Xr zpoolprops 8 +.Xr zpoolprops 7 manual page. .Bl -tag -width Ds .It Xr zpool-list 8 @@ -157,10 +157,8 @@ These events are consumed by the .Xr zed 8 and used to automate administrative tasks such as replacing a failed device with a hot spare. -For more information about the subclasses and event payloads -that can be generated see the -.Xr zfs-events 5 -man page. +That manual page also describes the subclasses and event payloads +that can be generated. .It Xr zpool-history 8 Displays the command history of the specified pool(s) or all pools if no pool is specified. @@ -523,9 +521,10 @@ is not set, it is assumed that the user is allowed to run .Sy Evolving . .Sh SEE ALSO -.Xr zfs-events 5 , -.Xr zfs-module-parameters 5 , -.Xr zpool-features 5 , +.Xr zfs 4 , +.Xr zpool-features 7 , +.Xr zpoolconcepts 7 , +.Xr zpoolprops 7 , .Xr zed 8 , .Xr zfs 8 , .Xr zpool-add 8 , @@ -558,6 +557,4 @@ is not set, it is assumed that the user is allowed to run .Xr zpool-sync 8 , .Xr zpool-trim 8 , .Xr zpool-upgrade 8 , -.Xr zpool-wait 8 , -.Xr zpoolconcepts 8 , -.Xr zpoolprops 8 +.Xr zpool-wait 8 diff --git a/man/man8/zpoolconcepts.8 b/man/man8/zpoolconcepts.8 deleted file mode 100644 index 80a1885fb1cb..000000000000 --- a/man/man8/zpoolconcepts.8 +++ /dev/null @@ -1,512 +0,0 @@ -.\" -.\" CDDL HEADER START -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" CDDL HEADER END -.\" -.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. -.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. -.\" Copyright (c) 2017 Datto Inc. -.\" Copyright (c) 2018 George Melikov. All Rights Reserved. -.\" Copyright 2017 Nexenta Systems, Inc. -.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. -.\" -.Dd June 2, 2021 -.Dt ZPOOLCONCEPTS 8 -.Os -. -.Sh NAME -.Nm zpoolconcepts -.Nd overview of ZFS storage pools -. -.Sh DESCRIPTION -.Ss Virtual Devices (vdevs) -A "virtual device" describes a single device or a collection of devices -organized according to certain performance and fault characteristics. -The following virtual devices are supported: -.Bl -tag -width "special" -.It Sy disk -A block device, typically located under -.Pa /dev . -ZFS can use individual slices or partitions, though the recommended mode of -operation is to use whole disks. -A disk can be specified by a full path, or it can be a shorthand name -.Po the relative portion of the path under -.Pa /dev -.Pc . -A whole disk can be specified by omitting the slice or partition designation. -For example, -.Pa sda -is equivalent to -.Pa /dev/sda . -When given a whole disk, ZFS automatically labels the disk, if necessary. -.It Sy file -A regular file. -The use of files as a backing store is strongly discouraged. -It is designed primarily for experimental purposes, as the fault tolerance of a -file is only as good as the file system on which it resides. -A file must be specified by a full path. -.It Sy mirror -A mirror of two or more devices. -Data is replicated in an identical fashion across all components of a mirror. -A mirror with -.Em N No disks of size Em X No can hold Em X No bytes and can withstand Em N-1 -devices failing without losing data. -.It Sy raidz , raidz1 , raidz2 , raidz3 -A variation on RAID-5 that allows for better distribution of parity and -eliminates the RAID-5 -.Qq write hole -.Pq in which data and parity become inconsistent after a power loss . -Data and parity is striped across all disks within a raidz group. -.Pp -A raidz group can have single, double, or triple parity, meaning that the -raidz group can sustain one, two, or three failures, respectively, without -losing any data. -The -.Sy raidz1 -vdev type specifies a single-parity raidz group; the -.Sy raidz2 -vdev type specifies a double-parity raidz group; and the -.Sy raidz3 -vdev type specifies a triple-parity raidz group. -The -.Sy raidz -vdev type is an alias for -.Sy raidz1 . -.Pp -A raidz group with -.Em N No disks of size Em X No with Em P No parity disks can hold approximately -.Em (N-P)*X No bytes and can withstand Em P No devices failing without losing data. -The minimum number of devices in a raidz group is one more than the number of -parity disks. -The recommended number is between 3 and 9 to help increase performance. -.It Sy draid , draid1 , draid2 , draid3 -A variant of raidz that provides integrated distributed hot spares which -allows for faster resilvering while retaining the benefits of raidz. -A dRAID vdev is constructed from multiple internal raidz groups, each with -.Em D No data devices and Em P No parity devices. -These groups are distributed over all of the children in order to fully -utilize the available disk performance. -.Pp -Unlike raidz, dRAID uses a fixed stripe width (padding as necessary with -zeros) to allow fully sequential resilvering. -This fixed stripe width significantly effects both usable capacity and IOPS. -For example, with the default -.Em D=8 No and Em 4kB No disk sectors the minimum allocation size is Em 32kB . -If using compression, this relatively large allocation size can reduce the -effective compression ratio. -When using ZFS volumes and dRAID, the default of the -.Sy volblocksize -property is increased to account for the allocation size. -If a dRAID pool will hold a significant amount of small blocks, it is -recommended to also add a mirrored -.Sy special -vdev to store those blocks. -.Pp -In regards to I/O, performance is similar to raidz since for any read all -.Em D No data disks must be accessed. -Delivered random IOPS can be reasonably approximated as -.Sy floor((N-S)/(D+P))*single_drive_IOPS . -.Pp -Like raidzm a dRAID can have single-, double-, or triple-parity. -The -.Sy draid1 , -.Sy draid2 , -and -.Sy draid3 -types can be used to specify the parity level. -The -.Sy draid -vdev type is an alias for -.Sy draid1 . -.Pp -A dRAID with -.Em N No disks of size Em X , D No data disks per redundancy group, Em P -.No parity level, and Em S No distributed hot spares can hold approximately -.Em (N-S)*(D/(D+P))*X No bytes and can withstand Em P -devices failing without losing data. -.It Sy draid Ns Oo Ar parity Oc Ns Oo Sy \&: Ns Ar data Ns Sy d Oc Ns Oo Sy \&: Ns Ar children Ns Sy c Oc Ns Oo Sy \&: Ns Ar spares Ns Sy s Oc -A non-default dRAID configuration can be specified by appending one or more -of the following optional arguments to the -.Sy draid -keyword: -.Bl -tag -compact -width "children" -.It Ar parity -The parity level (1-3). -.It Ar data -The number of data devices per redundancy group. -In general, a smaller value of -.Em D No will increase IOPS, improve the compression ratio, -and speed up resilvering at the expense of total usable capacity. -Defaults to -.Em 8 , No unless Em N-P-S No is less than Em 8 . -.It Ar children -The expected number of children. -Useful as a cross-check when listing a large number of devices. -An error is returned when the provided number of children differs. -.It Ar spares -The number of distributed hot spares. -Defaults to zero. -.El -.It Sy spare -A pseudo-vdev which keeps track of available hot spares for a pool. -For more information, see the -.Sx Hot Spares -section. -.It Sy log -A separate intent log device. -If more than one log device is specified, then writes are load-balanced between -devices. -Log devices can be mirrored. -However, raidz vdev types are not supported for the intent log. -For more information, see the -.Sx Intent Log -section. -.It Sy dedup -A device dedicated solely for deduplication tables. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. -If more than one dedup device is specified, then -allocations are load-balanced between those devices. -.It Sy special -A device dedicated solely for allocating various kinds of internal metadata, -and optionally small file blocks. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. -If more than one special device is specified, then -allocations are load-balanced between those devices. -.Pp -For more information on special allocations, see the -.Sx Special Allocation Class -section. -.It Sy cache -A device used to cache storage pool data. -A cache device cannot be configured as a mirror or raidz group. -For more information, see the -.Sx Cache Devices -section. -.El -.Pp -Virtual devices cannot be nested, so a mirror or raidz virtual device can only -contain files or disks. -Mirrors of mirrors -.Pq or other combinations -are not allowed. -.Pp -A pool can have any number of virtual devices at the top of the configuration -.Po known as -.Qq root vdevs -.Pc . -Data is dynamically distributed across all top-level devices to balance data -among devices. -As new virtual devices are added, ZFS automatically places data on the newly -available devices. -.Pp -Virtual devices are specified one at a time on the command line, -separated by whitespace. -Keywords like -.Sy mirror No and Sy raidz -are used to distinguish where a group ends and another begins. -For example, the following creates a pool with two root vdevs, -each a mirror of two disks: -.Dl # Nm zpool Cm create Ar mypool Sy mirror Ar sda sdb Sy mirror Ar sdc sdd -. -.Ss Device Failure and Recovery -ZFS supports a rich set of mechanisms for handling device failure and data -corruption. -All metadata and data is checksummed, and ZFS automatically repairs bad data -from a good copy when corruption is detected. -.Pp -In order to take advantage of these features, a pool must make use of some form -of redundancy, using either mirrored or raidz groups. -While ZFS supports running in a non-redundant configuration, where each root -vdev is simply a disk or file, this is strongly discouraged. -A single case of bit corruption can render some or all of your data unavailable. -.Pp -A pool's health status is described by one of three states: -.Sy online , degraded , No or Sy faulted . -An online pool has all devices operating normally. -A degraded pool is one in which one or more devices have failed, but the data is -still available due to a redundant configuration. -A faulted pool has corrupted metadata, or one or more faulted devices, and -insufficient replicas to continue functioning. -.Pp -The health of the top-level vdev, such as a mirror or raidz device, -is potentially impacted by the state of its associated vdevs, -or component devices. -A top-level vdev or component device is in one of the following states: -.Bl -tag -width "DEGRADED" -.It Sy DEGRADED -One or more top-level vdevs is in the degraded state because one or more -component devices are offline. -Sufficient replicas exist to continue functioning. -.Pp -One or more component devices is in the degraded or faulted state, but -sufficient replicas exist to continue functioning. -The underlying conditions are as follows: -.Bl -bullet -compact -.It -The number of checksum errors exceeds acceptable levels and the device is -degraded as an indication that something may be wrong. -ZFS continues to use the device as necessary. -.It -The number of I/O errors exceeds acceptable levels. -The device could not be marked as faulted because there are insufficient -replicas to continue functioning. -.El -.It Sy FAULTED -One or more top-level vdevs is in the faulted state because one or more -component devices are offline. -Insufficient replicas exist to continue functioning. -.Pp -One or more component devices is in the faulted state, and insufficient -replicas exist to continue functioning. -The underlying conditions are as follows: -.Bl -bullet -compact -.It -The device could be opened, but the contents did not match expected values. -.It -The number of I/O errors exceeds acceptable levels and the device is faulted to -prevent further use of the device. -.El -.It Sy OFFLINE -The device was explicitly taken offline by the -.Nm zpool Cm offline -command. -.It Sy ONLINE -The device is online and functioning. -.It Sy REMOVED -The device was physically removed while the system was running. -Device removal detection is hardware-dependent and may not be supported on all -platforms. -.It Sy UNAVAIL -The device could not be opened. -If a pool is imported when a device was unavailable, then the device will be -identified by a unique identifier instead of its path since the path was never -correct in the first place. -.El -.Pp -Checksum errors represent events where a disk returned data that was expected -to be correct, but was not. -In other words, these are instances of silent data corruption. -The checksum errors are reported in -.Nm zpool Cm status -and -.Nm zpool Cm events . -When a block is stored redundantly, a damaged block may be reconstructed -(e.g. from raidz parity or a mirrored copy). -In this case, ZFS reports the checksum error against the disks that contained -damaged data. -If a block is unable to be reconstructed (e.g. due to 3 disks being damaged -in a raidz2 group), it is not possible to determine which disks were silently -corrupted. -In this case, checksum errors are reported for all disks on which the block -is stored. -.Pp -If a device is removed and later re-attached to the system, -ZFS attempts online the device automatically. -Device attachment detection is hardware-dependent -and might not be supported on all platforms. -. -.Ss Hot Spares -ZFS allows devices to be associated with pools as -.Qq hot spares . -These devices are not actively used in the pool, but when an active device -fails, it is automatically replaced by a hot spare. -To create a pool with hot spares, specify a -.Sy spare -vdev with any number of devices. -For example, -.Dl # Nm zpool Cm create Ar pool Sy mirror Ar sda sdb Sy spare Ar sdc sdd -.Pp -Spares can be shared across multiple pools, and can be added with the -.Nm zpool Cm add -command and removed with the -.Nm zpool Cm remove -command. -Once a spare replacement is initiated, a new -.Sy spare -vdev is created within the configuration that will remain there until the -original device is replaced. -At this point, the hot spare becomes available again if another device fails. -.Pp -If a pool has a shared spare that is currently being used, the pool can not be -exported since other pools may use this shared spare, which may lead to -potential data corruption. -.Pp -Shared spares add some risk. -If the pools are imported on different hosts, -and both pools suffer a device failure at the same time, -both could attempt to use the spare at the same time. -This may not be detected, resulting in data corruption. -.Pp -An in-progress spare replacement can be cancelled by detaching the hot spare. -If the original faulted device is detached, then the hot spare assumes its -place in the configuration, and is removed from the spare list of all active -pools. -.Pp -The -.Sy draid -vdev type provides distributed hot spares. -These hot spares are named after the dRAID vdev they're a part of -.Po Sy draid1 Ns - Ns Ar 2 Ns - Ns Ar 3 No specifies spare Ar 3 No of vdev Ar 2 , -.No which is a single parity dRAID Pc -and may only be used by that dRAID vdev. -Otherwise, they behave the same as normal hot spares. -.Pp -Spares cannot replace log devices. -. -.Ss Intent Log -The ZFS Intent Log (ZIL) satisfies POSIX requirements for synchronous -transactions. -For instance, databases often require their transactions to be on stable storage -devices when returning from a system call. -NFS and other applications can also use -.Xr fsync 2 -to ensure data stability. -By default, the intent log is allocated from blocks within the main pool. -However, it might be possible to get better performance using separate intent -log devices such as NVRAM or a dedicated disk. -For example: -.Dl # Nm zpool Cm create Ar pool sda sdb Sy log Ar sdc -.Pp -Multiple log devices can also be specified, and they can be mirrored. -See the -.Sx EXAMPLES -section for an example of mirroring multiple log devices. -.Pp -Log devices can be added, replaced, attached, detached and removed. -In addition, log devices are imported and exported as part of the pool -that contains them. -Mirrored devices can be removed by specifying the top-level mirror vdev. -. -.Ss Cache Devices -Devices can be added to a storage pool as -.Qq cache devices . -These devices provide an additional layer of caching between main memory and -disk. -For read-heavy workloads, where the working set size is much larger than what -can be cached in main memory, using cache devices allows much more of this -working set to be served from low latency media. -Using cache devices provides the greatest performance improvement for random -read-workloads of mostly static content. -.Pp -To create a pool with cache devices, specify a -.Sy cache -vdev with any number of devices. -For example: -.Dl # Nm zpool Cm create Ar pool sda sdb Sy cache Ar sdc sdd -.Pp -Cache devices cannot be mirrored or part of a raidz configuration. -If a read error is encountered on a cache device, that read I/O is reissued to -the original storage pool device, which might be part of a mirrored or raidz -configuration. -.Pp -The content of the cache devices is persistent across reboots and restored -asynchronously when importing the pool in L2ARC (persistent L2ARC). -This can be disabled by setting -.Sy l2arc_rebuild_enabled Ns = Ns Sy 0 . -For cache devices smaller than -.Em 1GB , -we do not write the metadata structures -required for rebuilding the L2ARC in order not to waste space. -This can be changed with -.Sy l2arc_rebuild_blocks_min_l2size . -The cache device header -.Pq Em 512B -is updated even if no metadata structures are written. -Setting -.Sy l2arc_headroom Ns = Ns Sy 0 -will result in scanning the full-length ARC lists for cacheable content to be -written in L2ARC (persistent ARC). -If a cache device is added with -.Nm zpool Cm add -its label and header will be overwritten and its contents are not going to be -restored in L2ARC, even if the device was previously part of the pool. -If a cache device is onlined with -.Nm zpool Cm online -its contents will be restored in L2ARC. -This is useful in case of memory pressure -where the contents of the cache device are not fully restored in L2ARC. -The user can off- and online the cache device when there is less memory pressure -in order to fully restore its contents to L2ARC. -. -.Ss Pool checkpoint -Before starting critical procedures that include destructive actions -.Pq like Nm zfs Cm destroy , -an administrator can checkpoint the pool's state and in the case of a -mistake or failure, rewind the entire pool back to the checkpoint. -Otherwise, the checkpoint can be discarded when the procedure has completed -successfully. -.Pp -A pool checkpoint can be thought of as a pool-wide snapshot and should be used -with care as it contains every part of the pool's state, from properties to vdev -configuration. -Thus, certain operations are not allowed while a pool has a checkpoint. -Specifically, vdev removal/attach/detach, mirror splitting, and -changing the pool's GUID. -Adding a new vdev is supported, but in the case of a rewind it will have to be -added again. -Finally, users of this feature should keep in mind that scrubs in a pool that -has a checkpoint do not repair checkpointed data. -.Pp -To create a checkpoint for a pool: -.Dl # Nm zpool Cm checkpoint Ar pool -.Pp -To later rewind to its checkpointed state, you need to first export it and -then rewind it during import: -.Dl # Nm zpool Cm export Ar pool -.Dl # Nm zpool Cm import Fl -rewind-to-checkpoint Ar pool -.Pp -To discard the checkpoint from a pool: -.Dl # Nm zpool Cm checkpoint Fl d Ar pool -.Pp -Dataset reservations (controlled by the -.Sy reservation No and Sy refreservation -properties) may be unenforceable while a checkpoint exists, because the -checkpoint is allowed to consume the dataset's reservation. -Finally, data that is part of the checkpoint but has been freed in the -current state of the pool won't be scanned during a scrub. -. -.Ss Special Allocation Class -Allocations in the special class are dedicated to specific block types. -By default this includes all metadata, the indirect blocks of user data, and -any deduplication tables. -The class can also be provisioned to accept small file blocks. -.Pp -A pool must always have at least one normal -.Pq non- Ns Sy dedup Ns /- Ns Sy special -vdev before -other devices can be assigned to the special class. -If the -.Sy special -class becomes full, then allocations intended for it -will spill back into the normal class. -.Pp -Deduplication tables can be excluded from the special class by unsetting the -.Sy zfs_ddt_data_is_special -ZFS module parameter. -.Pp -Inclusion of small file blocks in the special class is opt-in. -Each dataset can control the size of small file blocks allowed -in the special class by setting the -.Sy special_small_blocks -property to nonzero. -See -.Xr zfsprops 8 -for more info on this property. diff --git a/man/man8/zpoolprops.8 b/man/man8/zpoolprops.8 deleted file mode 100644 index 050a0507288e..000000000000 --- a/man/man8/zpoolprops.8 +++ /dev/null @@ -1,412 +0,0 @@ -.\" -.\" CDDL HEADER START -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" CDDL HEADER END -.\" -.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. -.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. -.\" Copyright (c) 2017 Datto Inc. -.\" Copyright (c) 2018 George Melikov. All Rights Reserved. -.\" Copyright 2017 Nexenta Systems, Inc. -.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. -.\" Copyright (c) 2021, Colm Buckley -.\" -.Dd May 27, 2021 -.Dt ZPOOLPROPS 8 -.Os -. -.Sh NAME -.Nm zpoolprops -.Nd properties of ZFS storage pools -. -.Sh DESCRIPTION -Each pool has several properties associated with it. -Some properties are read-only statistics while others are configurable and -change the behavior of the pool. -.Pp -The following are read-only properties: -.Bl -tag -width "unsupported@guid" -.It Cm allocated -Amount of storage used within the pool. -See -.Sy fragmentation -and -.Sy free -for more information. -.It Sy capacity -Percentage of pool space used. -This property can also be referred to by its shortened column name, -.Sy cap . -.It Sy expandsize -Amount of uninitialized space within the pool or device that can be used to -increase the total capacity of the pool. -On whole-disk vdevs, this is the space beyond the end of the GPT – -typically occurring when a LUN is dynamically expanded -or a disk replaced with a larger one. -On partition vdevs, this is the space appended to the partition after it was -added to the pool – most likely by resizing it in-place. -The space can be claimed for the pool by bringing it online with -.Sy autoexpand=on -or using -.Nm zpool Cm online Fl e . -.It Sy fragmentation -The amount of fragmentation in the pool. -As the amount of space -.Sy allocated -increases, it becomes more difficult to locate -.Sy free -space. -This may result in lower write performance compared to pools with more -unfragmented free space. -.It Sy free -The amount of free space available in the pool. -By contrast, the -.Xr zfs 8 -.Sy available -property describes how much new data can be written to ZFS filesystems/volumes. -The zpool -.Sy free -property is not generally useful for this purpose, and can be substantially more than the zfs -.Sy available -space. -This discrepancy is due to several factors, including raidz parity; -zfs reservation, quota, refreservation, and refquota properties; and space set aside by -.Sy spa_slop_shift -(see -.Xr zfs-module-parameters 5 -for more information). -.It Sy freeing -After a file system or snapshot is destroyed, the space it was using is -returned to the pool asynchronously. -.Sy freeing -is the amount of space remaining to be reclaimed. -Over time -.Sy freeing -will decrease while -.Sy free -increases. -.It Sy health -The current health of the pool. -Health can be one of -.Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL . -.It Sy guid -A unique identifier for the pool. -.It Sy load_guid -A unique identifier for the pool. -Unlike the -.Sy guid -property, this identifier is generated every time we load the pool (i.e. does -not persist across imports/exports) and never changes while the pool is loaded -(even if a -.Sy reguid -operation takes place). -.It Sy size -Total size of the storage pool. -.It Sy unsupported@ Ns Em guid -Information about unsupported features that are enabled on the pool. -See -.Xr zpool-features 5 -for details. -.El -.Pp -The space usage properties report actual physical space available to the -storage pool. -The physical space can be different from the total amount of space that any -contained datasets can actually use. -The amount of space used in a raidz configuration depends on the characteristics -of the data being written. -In addition, ZFS reserves some space for internal accounting that the -.Xr zfs 8 -command takes into account, but the -.Nm -command does not. -For non-full pools of a reasonable size, these effects should be invisible. -For small pools, or pools that are close to being completely full, these -discrepancies may become more noticeable. -.Pp -The following property can be set at creation time and import time: -.Bl -tag -width Ds -.It Sy altroot -Alternate root directory. -If set, this directory is prepended to any mount points within the pool. -This can be used when examining an unknown pool where the mount points cannot be -trusted, or in an alternate boot environment, where the typical paths are not -valid. -.Sy altroot -is not a persistent property. -It is valid only while the system is up. -Setting -.Sy altroot -defaults to using -.Sy cachefile Ns = Ns Sy none , -though this may be overridden using an explicit setting. -.El -.Pp -The following property can be set only at import time: -.Bl -tag -width Ds -.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off -If set to -.Sy on , -the pool will be imported in read-only mode. -This property can also be referred to by its shortened column name, -.Sy rdonly . -.El -.Pp -The following properties can be set at creation time and import time, and later -changed with the -.Nm zpool Cm set -command: -.Bl -tag -width Ds -.It Sy ashift Ns = Ns Sy ashift -Pool sector size exponent, to the power of -.Sy 2 -(internally referred to as -.Sy ashift ) . -Values from 9 to 16, inclusive, are valid; also, the -value 0 (the default) means to auto-detect using the kernel's block -layer and a ZFS internal exception list. -I/O operations will be aligned to the specified size boundaries. -Additionally, the minimum (disk) -write size will be set to the specified size, so this represents a -space vs. performance trade-off. -For optimal performance, the pool sector size should be greater than -or equal to the sector size of the underlying disks. -The typical case for setting this property is when -performance is important and the underlying disks use 4KiB sectors but -report 512B sectors to the OS (for compatibility reasons); in that -case, set -.Sy ashift Ns = Ns Sy 12 -(which is -.Sy 1<<12 No = Sy 4096 ) . -When set, this property is -used as the default hint value in subsequent vdev operations (add, -attach and replace). -Changing this value will not modify any existing -vdev, not even on disk replacement; however it can be used, for -instance, to replace a dying 512B sectors disk with a newer 4KiB -sectors device: this will probably result in bad performance but at the -same time could prevent loss of data. -.It Sy autoexpand Ns = Ns Sy on Ns | Ns Sy off -Controls automatic pool expansion when the underlying LUN is grown. -If set to -.Sy on , -the pool will be resized according to the size of the expanded device. -If the device is part of a mirror or raidz then all devices within that -mirror/raidz group must be expanded before the new space is made available to -the pool. -The default behavior is -.Sy off . -This property can also be referred to by its shortened column name, -.Sy expand . -.It Sy autoreplace Ns = Ns Sy on Ns | Ns Sy off -Controls automatic device replacement. -If set to -.Sy off , -device replacement must be initiated by the administrator by using the -.Nm zpool Cm replace -command. -If set to -.Sy on , -any new device, found in the same physical location as a device that previously -belonged to the pool, is automatically formatted and replaced. -The default behavior is -.Sy off . -This property can also be referred to by its shortened column name, -.Sy replace . -Autoreplace can also be used with virtual disks (like device -mapper) provided that you use the /dev/disk/by-vdev paths setup by -vdev_id.conf. -See the -.Xr vdev_id 8 -manual page for more details. -Autoreplace and autoonline require the ZFS Event Daemon be configured and -running. -See the -.Xr zed 8 -manual page for more details. -.It Sy autotrim Ns = Ns Sy on Ns | Ns Sy off -When set to -.Sy on -space which has been recently freed, and is no longer allocated by the pool, -will be periodically trimmed. -This allows block device vdevs which support -BLKDISCARD, such as SSDs, or file vdevs on which the underlying file system -supports hole-punching, to reclaim unused blocks. -The default value for this property is -.Sy off . -.Pp -Automatic TRIM does not immediately reclaim blocks after a free. -Instead, it will optimistically delay allowing smaller ranges to be aggregated -into a few larger ones. -These can then be issued more efficiently to the storage. -TRIM on L2ARC devices is enabled by setting -.Sy l2arc_trim_ahead > 0 . -.Pp -Be aware that automatic trimming of recently freed data blocks can put -significant stress on the underlying storage devices. -This will vary depending of how well the specific device handles these commands. -For lower-end devices it is often possible to achieve most of the benefits -of automatic trimming by running an on-demand (manual) TRIM periodically -using the -.Nm zpool Cm trim -command. -.It Sy bootfs Ns = Ns Sy (unset) Ns | Ns Ar pool Ns Op / Ns Ar dataset -Identifies the default bootable dataset for the root pool. -This property is expected to be set mainly by the installation and upgrade programs. -Not all Linux distribution boot processes use the bootfs property. -.It Sy cachefile Ns = Ns Ar path Ns | Ns Sy none -Controls the location of where the pool configuration is cached. -Discovering all pools on system startup requires a cached copy of the -configuration data that is stored on the root file system. -All pools in this cache are automatically imported when the system boots. -Some environments, such as install and clustering, need to cache this -information in a different location so that pools are not automatically -imported. -Setting this property caches the pool configuration in a different location that -can later be imported with -.Nm zpool Cm import Fl c . -Setting it to the value -.Sy none -creates a temporary pool that is never cached, and the -.Qq -.Pq empty string -uses the default location. -.Pp -Multiple pools can share the same cache file. -Because the kernel destroys and recreates this file when pools are added and -removed, care should be taken when attempting to access this file. -When the last pool using a -.Sy cachefile -is exported or destroyed, the file will be empty. -.It Sy comment Ns = Ns Ar text -A text string consisting of printable ASCII characters that will be stored -such that it is available even if the pool becomes faulted. -An administrator can provide additional information about a pool using this -property. -.It Sy compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns … -Specifies that the pool maintain compatibility with specific feature sets. -When set to -.Sy off -(or unset) compatibility is disabled (all features may be enabled); when set to -.Sy legacy Ns -no features may be enabled. -When set to a comma-separated list of filenames -(each filename may either be an absolute path, or relative to -.Pa /etc/zfs/compatibility.d -or -.Pa /usr/share/zfs/compatibility.d ) -the lists of requested features are read from those files, separated by -whitespace and/or commas. -Only features present in all files may be enabled. -.Pp -See -.Xr zpool-features 5 , -.Xr zpool-create 8 -and -.Xr zpool-upgrade 8 -for more information on the operation of compatibility feature sets. -.It Sy dedupditto Ns = Ns Ar number -This property is deprecated and no longer has any effect. -.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off -Controls whether a non-privileged user is granted access based on the dataset -permissions defined on the dataset. -See -.Xr zfs 8 -for more information on ZFS delegated administration. -.It Sy failmode Ns = Ns Sy wait Ns | Ns Sy continue Ns | Ns Sy panic -Controls the system behavior in the event of catastrophic pool failure. -This condition is typically a result of a loss of connectivity to the underlying -storage device(s) or a failure of all devices within the pool. -The behavior of such an event is determined as follows: -.Bl -tag -width "continue" -.It Sy wait -Blocks all I/O access until the device connectivity is recovered and the errors -are cleared. -This is the default behavior. -.It Sy continue -Returns -.Er EIO -to any new write I/O requests but allows reads to any of the remaining healthy -devices. -Any write requests that have yet to be committed to disk would be blocked. -.It Sy panic -Prints out a message to the console and generates a system crash dump. -.El -.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled -The value of this property is the current state of -.Ar feature_name . -The only valid value when setting this property is -.Sy enabled -which moves -.Ar feature_name -to the enabled state. -See -.Xr zpool-features 5 -for details on feature states. -.It Sy listsnapshots Ns = Ns Sy on Ns | Ns Sy off -Controls whether information about snapshots associated with this pool is -output when -.Nm zfs Cm list -is run without the -.Fl t -option. -The default value is -.Sy off . -This property can also be referred to by its shortened name, -.Sy listsnaps . -.It Sy multihost Ns = Ns Sy on Ns | Ns Sy off -Controls whether a pool activity check should be performed during -.Nm zpool Cm import . -When a pool is determined to be active it cannot be imported, even with the -.Fl f -option. -This property is intended to be used in failover configurations -where multiple hosts have access to a pool on shared storage. -.Pp -Multihost provides protection on import only. -It does not protect against an -individual device being used in multiple pools, regardless of the type of vdev. -See the discussion under -.Nm zpool Cm create . -.Pp -When this property is on, periodic writes to storage occur to show the pool is -in use. -See -.Sy zfs_multihost_interval -in the -.Xr zfs-module-parameters 5 -manual page. -In order to enable this property each host must set a unique hostid. -See -.Xr genhostid 1 -.Xr zgenhostid 8 -.Xr spl-module-parameters 5 -for additional details. -The default value is -.Sy off . -.It Sy version Ns = Ns Ar version -The current on-disk version of the pool. -This can be increased, but never decreased. -The preferred method of updating pools is with the -.Nm zpool Cm upgrade -command, though this property can be used when a specific version is needed for -backwards compatibility. -Once feature flags are enabled on a pool this property will no longer have a -value. -.El diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index b073d68a46cf..6ce64db4f17b 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -477,7 +477,9 @@ systemctl --system daemon-reload >/dev/null || true %{_bindir}/dbufstat # Man pages %{_mandir}/man1/* +%{_mandir}/man4/* %{_mandir}/man5/* +%{_mandir}/man7/* %{_mandir}/man8/* # Configuration files and scripts %{_libexecdir}/%{name} diff --git a/scripts/zol2zfs-patch.sed b/scripts/zol2zfs-patch.sed index bb6d9faac450..0ca4b6cd6b7e 100755 --- a/scripts/zol2zfs-patch.sed +++ b/scripts/zol2zfs-patch.sed @@ -12,7 +12,7 @@ s:lib/libzfs:usr/src/lib/libzfs/common:g s:lib/libzfs_core:usr/src/lib/libzfs_core/common:g s:lib/libzpool:lib/libzpool/common:g s:lib/libzpool:usr/src/lib/libzpool:g -s:man/man5/zpool-features.5:usr/src/man/man5/zpool-features.5:g +s:man/man7/zpool-features.7:usr/src/man/man5/zpool-features.5:g s:man/man8/zfs.8:usr/src/man/man1m/zfs.1m:g s:module/nvpair:usr/src/common/nvpair:g s:module/zcommon:usr/src/common/zfs/:g diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh b/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh index 489360d8c523..5d0198c90c16 100755 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh @@ -84,7 +84,7 @@ log_must zpool import -d $VDIR $TESTPOOL # from ARC, accessed later on as prefetches and transition to MRU as # prefetches. # If accessed again they are counted as MRU and the l2arc_mru_asize arcstat -# will not be 0 (mentioned also in zfs-module-parameters.5) +# will not be 0 (mentioned also in zfs.4) # For the purposes of this test we mitigate this by disabling (predictive) # ZFS prefetches with zfs_prefetch_disable=1. log_must test $(get_arcstat l2_mru_asize) -eq 0 -- cgit v1.2.3 From 1b37cc1abee9a1c58ed54a93df569a81f86072c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 4 Jun 2021 22:08:50 +0200 Subject: Consistentify miscellaneous style on remaining manpages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most notably this fixes the vdev_id(8) non-.Xrs in vdev_id.conf.5 Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12212 --- man/man1/arcstat.1 | 16 +++++++++------- man/man1/cstyle.1 | 21 ++++++++++----------- man/man1/raidz_test.1 | 28 ++++++++++++++-------------- man/man1/zhack.1 | 8 +++----- man/man5/vdev_id.conf.5 | 38 ++++++++++++++++++-------------------- man/man8/fsck.zfs.8 | 8 ++++++-- man/man8/zed.8.in | 6 +++--- man/man8/zpool_influxdb.8 | 4 ++-- 8 files changed, 65 insertions(+), 64 deletions(-) diff --git a/man/man1/arcstat.1 b/man/man1/arcstat.1 index a0240e40e4b8..a69cd8937bdf 100644 --- a/man/man1/arcstat.1 +++ b/man/man1/arcstat.1 @@ -22,7 +22,7 @@ .Sh SYNOPSIS .Nm .Op Fl havxp -.Op Fl f Ar field Ns Op , Ns Ar field Ns ... +.Op Fl f Ar field Ns Op , Ns Ar field Ns … .Op Fl o Ar file .Op Fl s Ar string .Op Ar interval @@ -31,7 +31,7 @@ .Sh DESCRIPTION .Nm prints various ZFS ARC and L2ARC statistics in vmstat-like fashion: -.Bl -tag -width "l2asize" +.Bl -tag -compact -offset Ds -width "l2asize" .It Sy c ARC target size .It Sy dh% @@ -155,7 +155,7 @@ Print all possible stats. .It Fl f Display only specific fields. See -.Sy DESCRIPTION +.Sx DESCRIPTION for supported statistics. .It Fl h Display help message. @@ -166,17 +166,19 @@ Disable auto-scaling of numerical fields (for raw, machine-parsable values). .It Fl s Display data with a specified separator (default: 2 spaces). .It Fl x -Print extended stats (same as -.Fl f Ar time,mfu,mru,mfug,mrug,eskip,mtxmis,dread,pread,read Ns No ). +Print extended stats +.Pq same as Fl f Sy time , Ns Sy mfu , Ns Sy mru , Ns Sy mfug , Ns Sy mrug , Ns Sy eskip , Ns Sy mtxmis , Ns Sy dread , Ns Sy pread , Ns Sy read . .It Fl v Show field headers and definitions .El . .Sh OPERANDS The following operands are supported: -.Bl -tag -width "interval" +.Bl -tag -compact -offset Ds -width "interval" .It Ar interval Specify the sampling interval in seconds. .It Ar count -Display only \fIcount\fR reports. +Display only +.Ar count +reports. .El diff --git a/man/man1/cstyle.1 b/man/man1/cstyle.1 index 16f47ba35c1f..f5f9ec78f827 100644 --- a/man/man1/cstyle.1 +++ b/man/man1/cstyle.1 @@ -31,7 +31,7 @@ .Nm .Op Fl chpvCP .Op Fl o Ar construct Ns Op , Ns Ar construct Ns … -.Op Ar file… +.Oo Ar file Oc Ns … .Sh DESCRIPTION .Nm inspects C source files (*.c and *.h) for common stylistic errors. @@ -99,7 +99,7 @@ Allow doxygen-style block comments .Pq Sy /** No and Sy /*!\& . .It Sy splint Allow splint-style lint comments -.Pq Sy /*@...@*/ . +.Pq Sy /*@ Ns ... Ns Sy @*/ . .El .El . @@ -118,29 +118,28 @@ statement has balanced parenthesis. .It Some .Xr cpp 1 -macros do not require \fB;\fPs after them. +macros do not require +.Sy ;\& Ns s after them. Any such macros .Em must be ALL_CAPS; any lower case letters will cause bad output. .Pp -The bad output will generally be corrected after the next \fB;\fP, -.Sy { , -or -.Sy } . +The bad output will generally be corrected after the next +.Sy ;\& , { , No or Sy } . .El Some continuation error messages deserve some additional explanation: .Bl -tag -width Ds .It Sy multiple statements continued over multiple lines A multi-line statement which is not broken at statement boundaries. For example: -.Bd -literal +.Bd -literal -compact -offset Ds if (this_is_a_long_variable == another_variable) a = b + c; .Ed .Pp Will trigger this error. Instead, do: -.Bd -literal +.Bd -literal -compact -offset Ds if (this_is_a_long_variable == another_variable) a = b + c; .Ed @@ -148,13 +147,13 @@ if (this_is_a_long_variable == another_variable) For visibility, empty bodies for if, for, and while statements should be on their own line. For example: -.Bd -literal +.Bd -literal -compact -offset Ds while (do_something(&x) == 0); .Ed .Pp Will trigger this error. Instead, do: -.Bd -literal +.Bd -literal -compact -offset Ds while (do_something(&x) == 0) ; .Ed diff --git a/man/man1/raidz_test.1 b/man/man1/raidz_test.1 index 1c61c7a8772b..4283a4b527f3 100644 --- a/man/man1/raidz_test.1 +++ b/man/man1/raidz_test.1 @@ -50,44 +50,44 @@ option. .Bl -tag -width "-B(enchmark)" .It Fl h Print a help summary. -.It Fl a Ar ashift No (default: Sy 9 ) +.It Fl a Ar ashift Pq default: Sy 9 Ashift value. -.It Fl o Ar zio_off_shift No (default: Sy 0 ) +.It Fl o Ar zio_off_shift Pq default: Sy 0 ZIO offset for each raidz block. The offset's value is -.Sy 1 << zio_off_shift . -.It Fl d Ar raidz_data_disks No (default: Sy 8 ) +.Em 2^zio_off_shift . +.It Fl d Ar raidz_data_disks Pq default: Sy 8 Number of raidz data disks to use. Additional disks will be used for parity. -.It Fl s Ar zio_size_shift No (default: Sy 19 ) +.It Fl s Ar zio_size_shift Pq default: Sy 19 Size of data for raidz block. The real size is -.Sy 1 << zio_size_shift . -.It Fl r Ar reflow_offset No (default: Sy uint max ) +.Em 2^zio_size_shift . +.It Fl r Ar reflow_offset Pq default: Sy uint max Set raidz expansion offset. The expanded raidz map allocation function will produce different map configurations depending on this value. -.It Fl S Ns No (weep) +.It Fl S Ns Pq weep Sweep parameter space while verifying the raidz implementations. This option will exhaust all most of valid values for the .Fl aods options. Runtime using this option will be long. -.It Fl t Ns No (imeout) +.It Fl t Ns Pq imeout Wall time for sweep test in seconds. The actual runtime could be longer. -.It Fl B Ns No (enchmark) +.It Fl B Ns Pq enchmark All implementations are benchmarked using increasing per disk data size. Results are given as throughput per disk, measured in MiB/s. -.It Fl e Ns No (xpansion) +.It Fl e Ns Pq xpansion Use expanded raidz map allocation function. -.It Fl v Ns No (erbose) +.It Fl v Ns Pq erbose Increase verbosity. -.It Fl T Ns No (est the test) +.It Fl T Ns Pq est the test Debugging option: fail all tests. This is to check if tests would properly verify bit-exactness. -.It Fl D Ns No (ebug) +.It Fl D Ns Pq ebug Debugging option: attach .Xr gdb 1 when diff --git a/man/man1/zhack.1 b/man/man1/zhack.1 index b18b3a4c0996..83046ee8f515 100644 --- a/man/man1/zhack.1 +++ b/man/man1/zhack.1 @@ -119,7 +119,7 @@ Can be specified more than once. . .Sh EXAMPLES .Bd -literal -# zhack feature stat tank +.No # Nm zhack Cm feature stat Ar tank for_read_obj: org.illumos:lz4_compress = 0 for_write_obj: @@ -130,10 +130,8 @@ descriptions_obj: com.delphix:empty_bpobj = Snapshots use less space. org.illumos:lz4_compress = LZ4 compression algorithm support. -# zhack feature enable -d 'Predict future disk failures.' \\ - tank com.example:clairvoyance - -# zhack feature ref tank com.example:clairvoyance +.No # Nm zhack Cm feature enable Fl d No 'Predict future disk failures.' Ar tank com.example:clairvoyance +.No # Nm zhack Cm feature ref Ar tank com.example:clairvoyance .Ed . .Sh SEE ALSO diff --git a/man/man5/vdev_id.conf.5 b/man/man5/vdev_id.conf.5 index 1268114bdbc6..a2d38add4ee0 100644 --- a/man/man5/vdev_id.conf.5 +++ b/man/man5/vdev_id.conf.5 @@ -14,13 +14,13 @@ . .Sh NAME .Nm vdev_id.conf -.Nd Configuration file for vdev_id +.Nd configuration file for vdev_id(8) .Sh DESCRIPTION .Nm is the configuration file for -.Nm vdev_id Ns Sy (8) . +.Xr vdev_id 8 . It controls the default behavior of -.Nm vdev_id Ns Sy (8) +.Xr vdev_id 8 while it is mapping a disk device name to an alias. .Pp The @@ -37,7 +37,7 @@ Maps a device link in the .Pa /dev directory hierarchy to a new device name. The udev rule defining the device link must have run prior to -.Nm vdev_id Ns Sy (8) . +.Xr vdev_id 8 . A defined alias takes precedence over a topology-derived name, but the two naming methods can otherwise coexist. For example, one might name drives in a JBOD with the @@ -94,12 +94,12 @@ and a default mapping applied to the others. . .It Sy multipath Sy yes Ns | Ns Sy no Specifies whether -.Nm vdev_id Ns Sy (8) +.Xr vdev_id 8 will handle only dm-multipath devices. If set to .Sy yes then -.Nm vdev_id Ns Sy (8) +.Xr vdev_id 8 will examine the first running component disk of a dm-multipath device as provided by the driver command to determine the physical path. . @@ -116,7 +116,7 @@ channels are uniquely identified by a SAS switch port number .It Sy phys_per_port Ar num Specifies the number of PHY devices associated with a SAS HBA port or SAS switch port. -.Nm vdev_id Ns Sy (8) +.Xr vdev_id 8 internally uses this value to determine which HBA or switch port a device is connected to. The default is @@ -157,13 +157,13 @@ may be unstable across disk replacement. .Bl -tag -width "-v v" .It Pa /etc/zfs/vdev_id.conf The configuration file for -.Nm vdev_id Ns Sy (8) . +.Xr vdev_id 8 . .El . .Sh EXAMPLES A non-multipath configuration with direct-attached SAS enclosures and an arbitrary slot re-mapping: -.Bd -literal -offset Ds +.Bd -literal -compact -offset Ds multipath no topology sas_direct phys_per_port 4 @@ -195,8 +195,8 @@ slot 4 3 A SAS-switch topology. Note, that the .Ar channel -keyword takes only two arguments in this example. -.Bd -literal -offset Ds +keyword takes only two arguments in this example: +.Bd -literal -compact -offset Ds topology sas_switch # SWITCH PORT CHANNEL NAME @@ -207,9 +207,8 @@ channel 4 D .Ed .Pp A multipath configuration. -Note that channel names have multiple -definitions - one per physical path. -.Bd -literal -offset Ds +Note that channel names have multiple definitions - one per physical path: +.Bd -literal -compact -offset Ds multipath yes # PCI_SLOT HBA PORT CHANNEL NAME @@ -219,8 +218,8 @@ channel 86:00.0 1 A channel 86:00.0 0 B .Ed .Pp -A configuration with enclosure_symlinks enabled. -.Bd -literal -offset Ds +A configuration with enclosure_symlinks enabled: +.Bd -literal -compact -offset Ds multipath yes enclosure_symlinks yes @@ -230,17 +229,16 @@ channel 05:00.0 0 L channel 06:00.0 1 U channel 06:00.0 0 L .Ed -.Pp In addition to the disks symlinks, this configuration will create: -.Bd -literal -offset Ds +.Bd -literal -compact -offset Ds /dev/by-enclosure/enc-L0 /dev/by-enclosure/enc-L1 /dev/by-enclosure/enc-U0 /dev/by-enclosure/enc-U1 .Ed .Pp -A configuration using device link aliases. -.Bd -literal -offset Ds +A configuration using device link aliases: +.Bd -literal -compact -offset Ds # by-vdev # name fully qualified or base name of device link alias d1 /dev/disk/by-id/wwn-0x5000c5002de3b9ca diff --git a/man/man8/fsck.zfs.8 b/man/man8/fsck.zfs.8 index b88dd847b1bb..0ce7576ebe63 100644 --- a/man/man8/fsck.zfs.8 +++ b/man/man8/fsck.zfs.8 @@ -58,7 +58,9 @@ interface still allows it to communicate some errors: if the .Ar dataset is in a degraded pool, then .Nm -will return exit code 4 to indicate an uncorrected filesystem error. +will return exit code +.Sy 4 +to indicate an uncorrected filesystem error. .Pp Similarly, if the .Ar dataset @@ -66,7 +68,9 @@ is in a faulted pool and has a legacy .Pa /etc/fstab record, then .Nm -will return exit code 8 to indicate a fatal operational error. +will return exit code +.Sy 8 +to indicate a fatal operational error. .Sh SEE ALSO .Xr fstab 5 , .Xr fsck 8 , diff --git a/man/man8/zed.8.in b/man/man8/zed.8.in index b0b26bfcf8e2..d3297605206e 100644 --- a/man/man8/zed.8.in +++ b/man/man8/zed.8.in @@ -147,7 +147,7 @@ is sourced by scripts; these variables should be prefixed with The zevent nvpairs are passed to ZEDLETs as environment variables. Each nvpair name is converted to an environment variable in the following manner: -.Bl -enum +.Bl -enum -compact .It it is prefixed with .Sy ZEVENT_ , @@ -160,7 +160,7 @@ each non-alphanumeric character is converted to an underscore. Some additional environment variables have been defined to present certain nvpair values in a more convenient form. An incomplete list of zevent environment variables is as follows: -.Bl -tag -width "ZEVENT_TIME_STRING" +.Bl -tag -compact -width "ZEVENT_TIME_STRING" .It Sy ZEVENT_EID The Event IDentifier. .It Sy ZEVENT_CLASS @@ -187,7 +187,7 @@ An almost-RFC3339-compliant string for .El .Pp Additionally, the following ZED & ZFS variables are defined: -.Bl -tag -width "ZEVENT_TIME_STRING" +.Bl -tag -compact -width "ZEVENT_TIME_STRING" .It Sy ZED_PID The daemon's process ID. .It Sy ZED_ZEDLET_DIR diff --git a/man/man8/zpool_influxdb.8 b/man/man8/zpool_influxdb.8 index a4e417078fd3..021fbdeaac8a 100644 --- a/man/man8/zpool_influxdb.8 +++ b/man/man8/zpool_influxdb.8 @@ -32,7 +32,7 @@ .Op Fl e Ns | Ns Fl -execd .Op Fl n Ns | Ns Fl -no-histogram .Op Fl s Ns | Ns Fl -sum-histogram-buckets -.Op Fl t Ns | Ns Fl -tags Ar key Ns = Ns Ar value Ns Op , Ns Ar key Ns = Ns Ar value Ns … +.Op Fl t Ns | Ns Fl -tags Ar key Ns = Ns Ar value Ns Oo , Ns Ar key Ns = Ns Ar value Oc Ns … .Op Ar pool . .Sh DESCRIPTION @@ -80,7 +80,7 @@ By default, the values are not accumulated and the raw data appears as shown by .Nm zpool Cm iostat . This works well for Grafana's heatmap plugin. Summing the buckets produces output similar to Prometheus histograms. -.It Fl t , Fl -tags Ar key Ns = Ns Ar value Ns Op , Ns Ar key Ns = Ns Ar value Ns … +.It Fl t , Fl -tags Ar key Ns = Ns Ar value Ns Oo , Ns Ar key Ns = Ns Ar value Oc Ns … Adds specified tags to the tag set. No sanity checking is performed. See the InfluxDB Line Protocol format documentation for details on escaping -- cgit v1.2.3 From 4b7ed6a286c3c92e34f2941dafd822cfa6a880dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 4 Jun 2021 22:43:21 +0200 Subject: zgenhostid.8: revisit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12212 --- man/man8/zgenhostid.8 | 60 ++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/man/man8/zgenhostid.8 b/man/man8/zgenhostid.8 index 3eff55b6d861..0dcebef73c31 100644 --- a/man/man8/zgenhostid.8 +++ b/man/man8/zgenhostid.8 @@ -18,72 +18,78 @@ .\" .\" CDDL HEADER END .\" -.\" .\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC. .\" .Dd May 26, 2021 -.Dt ZGENHOSTID 8 SMM +.Dt ZGENHOSTID 8 .Os +. .Sh NAME .Nm zgenhostid -.Nd generate and store a hostid in -.Em /etc/hostid +.Nd generate host ID into /etc/hostid .Sh SYNOPSIS .Nm .Op Fl f .Op Fl o Ar filename .Op Ar hostid +. .Sh DESCRIPTION Creates .Pa /etc/hostid -file and stores hostid in it. -If the user provides -.Op Ar hostid -on the command line, validates and stores that value. -Otherwise, randomly generates a value to store. -.Bl -tag -width "hostid" +file and stores the host ID in it. +If +.Ar hostid +was provided, validate and store that value. +Otherwise, randomly generate an ID. +. +.Sh OPTIONS +.Bl -tag -width "-o filename" .It Fl h Display a summary of the command-line options. .It Fl f -Force file overwrite. +Allow output overwrite. .It Fl o Ar filename Write to .Pa filename -instead of default -.Pa /etc/hostid +instead of the default +.Pa /etc/hostid . .It Ar hostid Specifies the value to be placed in .Pa /etc/hostid . It should be a number with a value between 1 and 2^32-1. -If it is 0, zgenhostid will generate a random hostid. +If +.Sy 0 , +generate a random ID. This value -.Sy must +.Em must be unique among your systems. It -.Sy must -be expressed in hexadecimal and be exactly -.Em 8 -digits long, optionally prefixed by -.Em 0x . +.Em must +be an 8-digit-long hexadecimal number, optionally prefixed by +.Qq 0x . .El +. .Sh FILES .Pa /etc/hostid +. .Sh EXAMPLES .Bl -tag -width Bd .It Generate a random hostid and store it -.Dl # zgenhostid +.Dl # Nm .It Record the libc-generated hostid in Pa /etc/hostid -.Dl # zgenhostid "$(hostid)" -.It Record a custom hostid (0xdeadbeef) in Pa /etc/hostid -.Dl # zgenhostid deadbeef -.It Record a custom hostid (0x01234567) in Pa /tmp/hostid No and ovewrite the file if it exists -.Dl # zgenhostid -f -o /tmp/hostid 0x01234567 +.Dl # Nm Qq $ Ns Pq Nm hostid +.It Record a custom hostid Po Ar 0xdeadbeef Pc in Pa /etc/hostid +.Dl # Nm Ar deadbeef +.It Record a custom hostid Po Ar 0x01234567 Pc in Pa /tmp/hostid No and ovewrite the file if it exists +.Dl # Nm Fl f o Ar /tmp/hostid 0x01234567 .El +. .Sh SEE ALSO .Xr genhostid 1 , .Xr hostid 1 , .Xr sethostid 3 , .Xr spl 4 +. .Sh HISTORY .Nm emulates the @@ -91,4 +97,4 @@ emulates the utility and is provided for use on systems which do not include the utility or do not provide the .Xr sethostid 3 -call. +function. -- cgit v1.2.3 From d406a695c640dd93f49f8dcb532bfa0394ad1812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 3 Jun 2021 23:34:27 +0200 Subject: libefi: remove efi_auto_sense() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's present (but undocumented) in the illumos gate and used exclusively by rmformat(1) (which I recommend as a nice blast from the past), and also the math assumes 512B sectors and is therefore wrong Reviewed-by: Matthew Ahrens Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12191 --- include/sys/efi_partition.h | 1 - lib/libefi/rdwr_efi.c | 54 - lib/libzfs_core/libzfs_core.abi | 3481 ++++++++++++++++++++------------------- 3 files changed, 1744 insertions(+), 1792 deletions(-) diff --git a/include/sys/efi_partition.h b/include/sys/efi_partition.h index 88bdfd2b1ca3..fbb58013e22a 100644 --- a/include/sys/efi_partition.h +++ b/include/sys/efi_partition.h @@ -370,7 +370,6 @@ extern int efi_rescan(int); extern void efi_free(struct dk_gpt *); extern int efi_type(int); extern void efi_err_check(struct dk_gpt *); -extern int efi_auto_sense(int fd, struct dk_gpt **); extern int efi_use_whole_disk(int fd); #endif diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 1e022cc8eeaa..39b3509a5c4b 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -1692,57 +1692,3 @@ efi_err_check(struct dk_gpt *vtoc) "no reserved partition found\n"); } } - -/* - * We need to get information necessary to construct a *new* efi - * label type - */ -int -efi_auto_sense(int fd, struct dk_gpt **vtoc) -{ - - int i; - - /* - * Now build the default partition table - */ - if (efi_alloc_and_init(fd, EFI_NUMPAR, vtoc) != 0) { - if (efi_debug) { - (void) fprintf(stderr, "efi_alloc_and_init failed.\n"); - } - return (-1); - } - - for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) { - (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag; - (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag; - (*vtoc)->efi_parts[i].p_start = 0; - (*vtoc)->efi_parts[i].p_size = 0; - } - /* - * Make constants first - * and variable partitions later - */ - - /* root partition - s0 128 MB */ - (*vtoc)->efi_parts[0].p_start = 34; - (*vtoc)->efi_parts[0].p_size = 262144; - - /* partition - s1 128 MB */ - (*vtoc)->efi_parts[1].p_start = 262178; - (*vtoc)->efi_parts[1].p_size = 262144; - - /* partition -s2 is NOT the Backup disk */ - (*vtoc)->efi_parts[2].p_tag = V_UNASSIGNED; - - /* partition -s6 /usr partition - HOG */ - (*vtoc)->efi_parts[6].p_start = 524322; - (*vtoc)->efi_parts[6].p_size = (*vtoc)->efi_last_u_lba - 524322 - - (1024 * 16); - - /* efi reserved partition - s9 16K */ - (*vtoc)->efi_parts[8].p_start = (*vtoc)->efi_last_u_lba - (1024 * 16); - (*vtoc)->efi_parts[8].p_size = (1024 * 16); - (*vtoc)->efi_parts[8].p_tag = V_RESERVED; - return (0); -} diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 574928cba089..79567d1504db 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -154,7 +154,6 @@ - @@ -291,60 +290,60 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -354,234 +353,234 @@ - + - + - + - + - + - + - + - + - + - + - + - + - - - - + + + + - - - + + + - - + + - - - - - - - + + + + + + + - + - + - + - + - + + + + + + + + + - + - + - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -589,12 +588,12 @@ - + - + @@ -605,48 +604,48 @@ - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + @@ -656,105 +655,105 @@ - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -762,78 +761,78 @@ - + - + - + - + - - - - + + + + - + - - - - + + + + - - - + + + - - + + - - - - + + + + - + - + - + - + - + - + - - - + + + @@ -865,66 +864,66 @@ - + - + - + - - - - + + + + - + - - - - + + + + - + - - + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - + @@ -932,133 +931,133 @@ - - - - - + + + + + - - - - - - - - + + + + + + + + - + - - - - - - + + + + + + - - - - - - - + + + + + + + - - - - - - - - - - + + + + + + + + + + - - - - - + + + + + - - - - - - - + + + + + + + - + - + - + - + - + - + - - - - - - - + + + + + + + - + - + - + - + - + @@ -1073,55 +1072,55 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1129,58 +1128,58 @@ - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1258,79 +1257,79 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1346,53 +1345,53 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1400,116 +1399,116 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1517,187 +1516,187 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - - - - - + + + + + + + - + - - + + - - + + - - - - - - - + + + + + + + - - - - - + + + + + - - + + - - - - - - + + + + + + - - - + + + - + - - - - - - + + + + + + - + - - - - - - - - + + + + + + + + - - - - + + + + - + @@ -1709,377 +1708,390 @@ - + - - - - + + + + - + - - - - - + + + + + - + - - - - - + + + + + - - - + + + - - - + + + - - - - - - + + + + + + - + + + + + + + + - + - - - + + + - + - - - - + + + + - + - + - - + + - + - + - + - + - + - + - - + + - + - + - + - - - - - - - - - - - - + + + + + + + + + + + + - - - + + + - - - - - + + + + + - + - + - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - + - + - + - + - - + + - + - + - + - + - - + + - + - + - - + + - + - + - - + + - - - - - + + + + + - - + + - - - - - - + + + + + + - - - - + + + + - - + + + + + + + + + - - - + + + - - - - - - + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + + + + - + - + - + - + @@ -2089,202 +2101,202 @@ - + - + + - + - + - + - + - + - + - + - + - + - + - - - - - - - + + + + + + + + + + + + + + - + - + - - - - - - + - - - - - - - - - - - - - - - - + - + - + - + - + - - - - - - + + + + + + + + + + + + + + + + + + - + - + - + - + - - + + - + - - + + - + - + - - + + - + - + - + - + - + - + @@ -2293,79 +2305,79 @@ - + - + - + - + - + - + - + - + - + - + - - + + - - + + - - + + - + - + - + - + - - + + @@ -2373,12 +2385,12 @@ - - - + + + - + @@ -2386,71 +2398,71 @@ - - - - - + + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - + + - + - + - + - + - + - + - + - + - + - + - + @@ -2458,21 +2470,21 @@ - + - - - + + + - + - + @@ -2480,167 +2492,167 @@ - - - - - - + + + + + + - - - - + + + + - - + + - + - - + + - + - + - + - + - + - + - + - + - - + + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + @@ -2658,432 +2670,432 @@ - + - + - - + + - - + + - + - + - + - + - - - + + + - - + + - - + + - + - + - - + + - - + + - - - + + + - - + + - + - + - + - - + + - + - + - + - + - + - + - + - + - - - + + + - + - + - - - - + + + + - + - + - + - + - + - + - + - + - - - - + + + + - + - - - - + + + + - - + + - + - + - - + + - - - - + + + + - - - - - + + + + + - - + + - - + + - - - - - + + + + + - - - - + + + + - - - - + + + + - - - - - + + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - + + - - - + + + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -3096,70 +3108,70 @@ - - + + - + - + - + - - + + - + - + - + - + - - - - - + + + + + - + - + - + - + - - + + - + - + - + - + @@ -3167,103 +3179,103 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + + + + - + - + - + @@ -3273,80 +3285,80 @@ - + - + - - + + - + - + - + - - + + - - + + - - + + - - + + - - + + - - + + - + - + - + - + - + - + @@ -3354,726 +3366,726 @@ - - + + - + - - + + - + - - + + - - + + - - + + - - + + - - + + - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - + + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - - + + + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - - + + + + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - + - + - + - + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -4088,378 +4100,378 @@ - + - - + + - + - + - + - + - + - - - - + + + + - + - + - + - + - + - + - + - + - + - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + + + + + + + - - - - + + + + - - + + - + - + - + - + - + - + - + - - - - + + + + - - + + - - - - + + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - + + - - + + - - + + - - + + - - - + + + - - - + + + - - - + + + - + - - - + + + - - + + - - + + - - + + - - - - - + + + + + - - + + - + - + - - + + - - - - - + + + + + - - - - - + + + + + - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -4467,19 +4479,19 @@ - - - + + + - - + + - + - + @@ -4487,76 +4499,71 @@ - - - - - - + + + + + + - + - - - + + + - - + + - - + + - - - + + + - + - + - + - + - + - + - - + + - - - - - -- cgit v1.2.3 From 327c904615db9cfcd8c9e07cb56b8e36050a4048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 3 Jun 2021 23:50:07 +0200 Subject: lib{efi,avl,share,tpool,zfs_core,zfsbootenv,zutil}: -fvisibility=hidden MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No symbols affected in libavl No symbols affected by libtpool, but pre-ANSI declarations got purged No symbols affected by libzfs_core No symbols affected by libzfs_bootenv libefi got cleaned, gained efi_debug documentation in efi_partition.h, and removes one undocumented and unused symbol from libzfs_core: D default_vtoc_map libnvpair saw removal of these symbols: D nv_alloc_nosleep_def D nv_alloc_sleep D nv_alloc_sleep_def D nv_fixed_ops_def D nvlist_hashtable_init_size D nvpair_max_recursion libshare saw removal of these symbols from libzfs: T libshare_nfs_init T libshare_smb_init T register_fstype B smb_shares libzutil saw removal of these internal symbols from libzfs_core: T label_paths T slice_cache_compare T zpool_find_import_blkid T zpool_open_func T zutil_alloc T zutil_strdup Reviewed-by: Matthew Ahrens Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #12191 --- include/libnvpair.h | 37 +- include/libzfs_core.h | 130 ++- include/libzfsbootenv.h | 20 +- include/sys/avl.h | 36 +- include/sys/avl_impl.h | 5 +- include/sys/efi_partition.h | 19 +- include/sys/nvpair.h | 443 +++---- include/thread_pool.h | 36 +- lib/libavl/Makefile.am | 1 + lib/libefi/Makefile.am | 1 + lib/libefi/rdwr_efi.c | 34 - lib/libnvpair/Makefile.am | 1 + lib/libnvpair/libnvpair.abi | 2468 +++++++++++++++++++-------------------- lib/libshare/Makefile.am | 2 + lib/libshare/nfs.c | 2 +- lib/libshare/os/freebsd/nfs.c | 2 +- lib/libshare/os/linux/nfs.c | 2 +- lib/libspl/include/libshare.h | 14 +- lib/libtpool/Makefile.am | 2 + lib/libzfs/libzfs.abi | 183 ++- lib/libzfs_core/Makefile.am | 2 + lib/libzfs_core/libzfs_core.abi | 1605 ++++++++++++------------- lib/libzfsbootenv/Makefile.am | 2 + lib/libzutil/Makefile.am | 1 + 24 files changed, 2481 insertions(+), 2567 deletions(-) diff --git a/include/libnvpair.h b/include/libnvpair.h index 5277f9574ddf..bc50c3b7e1f8 100644 --- a/include/libnvpair.h +++ b/include/libnvpair.h @@ -24,7 +24,7 @@ */ #ifndef _LIBNVPAIR_H -#define _LIBNVPAIR_H +#define _LIBNVPAIR_H extern __attribute__((visibility("default"))) #include #include @@ -42,13 +42,13 @@ extern "C" { * are all imported from included above. */ -extern int nvpair_value_match(nvpair_t *, int, char *, char **); -extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, +_LIBNVPAIR_H int nvpair_value_match(nvpair_t *, int, char *, char **); +_LIBNVPAIR_H int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **); -extern void nvlist_print(FILE *, nvlist_t *); -int nvlist_print_json(FILE *, nvlist_t *); -extern void dump_nvlist(nvlist_t *, int); +_LIBNVPAIR_H void nvlist_print(FILE *, nvlist_t *); +_LIBNVPAIR_H int nvlist_print_json(FILE *, nvlist_t *); +_LIBNVPAIR_H void dump_nvlist(nvlist_t *, int); /* * Private nvlist printing interface that allows the caller some control @@ -88,18 +88,18 @@ enum nvlist_indent_mode { NVLIST_INDENT_TABBED /* Indent with tabstops */ }; -extern nvlist_prtctl_t nvlist_prtctl_alloc(void); -extern void nvlist_prtctl_free(nvlist_prtctl_t); -extern void nvlist_prt(nvlist_t *, nvlist_prtctl_t); +_LIBNVPAIR_H nvlist_prtctl_t nvlist_prtctl_alloc(void); +_LIBNVPAIR_H void nvlist_prtctl_free(nvlist_prtctl_t); +_LIBNVPAIR_H void nvlist_prt(nvlist_t *, nvlist_prtctl_t); /* Output stream */ -extern void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *); -extern FILE *nvlist_prtctl_getdest(nvlist_prtctl_t); +_LIBNVPAIR_H void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *); +_LIBNVPAIR_H FILE *nvlist_prtctl_getdest(nvlist_prtctl_t); /* Indentation mode, start indent, indent increment; default tabbed/0/1 */ -extern void nvlist_prtctl_setindent(nvlist_prtctl_t, enum nvlist_indent_mode, - int, int); -extern void nvlist_prtctl_doindent(nvlist_prtctl_t, int); +_LIBNVPAIR_H void nvlist_prtctl_setindent(nvlist_prtctl_t, + enum nvlist_indent_mode, int, int); +_LIBNVPAIR_H void nvlist_prtctl_doindent(nvlist_prtctl_t, int); enum nvlist_prtctl_fmt { NVLIST_FMT_MEMBER_NAME, /* name fmt; default "%s = " */ @@ -107,9 +107,10 @@ enum nvlist_prtctl_fmt { NVLIST_FMT_BTWN_ARRAY /* between array members; default " " */ }; -extern void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, +_LIBNVPAIR_H void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, const char *); -extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...); +_LIBNVPAIR_H void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, + ...); /* * Function prototypes for interfaces that appoint a new rendering function @@ -139,7 +140,7 @@ extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...); */ #define NVLIST_PRINTCTL_SVDECL(funcname, valtype) \ - extern void funcname(nvlist_prtctl_t, \ + _LIBNVPAIR_H void funcname(nvlist_prtctl_t, \ int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, valtype), \ void *) @@ -170,7 +171,7 @@ NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_nvlist, nvlist_t *); * Return values as above. */ #define NVLIST_PRINTCTL_AVDECL(funcname, vtype) \ - extern void funcname(nvlist_prtctl_t, \ + _LIBNVPAIR_H void funcname(nvlist_prtctl_t, \ int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, uint_t), \ void *) diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 34161a06fb45..83d8211ab615 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -27,7 +27,7 @@ */ #ifndef _LIBZFS_CORE_H -#define _LIBZFS_CORE_H +#define _LIBZFS_CORE_H extern __attribute__((visibility("default"))) #include #include @@ -38,41 +38,42 @@ extern "C" { #endif -int libzfs_core_init(void); -void libzfs_core_fini(void); +_LIBZFS_CORE_H int libzfs_core_init(void); +_LIBZFS_CORE_H void libzfs_core_fini(void); /* - * NB: this type should be kept binary compatible with dmu_objset_type_t. + * NB: this type should be kept binary-compatible with dmu_objset_type_t. */ enum lzc_dataset_type { LZC_DATSET_TYPE_ZFS = 2, LZC_DATSET_TYPE_ZVOL }; -int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *, +_LIBZFS_CORE_H int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, + uint8_t *, uint_t); +_LIBZFS_CORE_H int lzc_clone(const char *, const char *, nvlist_t *); +_LIBZFS_CORE_H int lzc_promote(const char *, char *, int); +_LIBZFS_CORE_H int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); +_LIBZFS_CORE_H int lzc_bookmark(nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_get_bookmark_props(const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); +_LIBZFS_CORE_H int lzc_unload_key(const char *); +_LIBZFS_CORE_H int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t); -int lzc_clone(const char *, const char *, nvlist_t *); -int lzc_promote(const char *, char *, int); -int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); -int lzc_bookmark(nvlist_t *, nvlist_t **); -int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); -int lzc_get_bookmark_props(const char *, nvlist_t **); -int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); -int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); -int lzc_unload_key(const char *); -int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t); -int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *, - nvlist_t **); -int lzc_trim(const char *, pool_trim_func_t, uint64_t, boolean_t, +_LIBZFS_CORE_H int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *, nvlist_t **); -int lzc_redact(const char *, const char *, nvlist_t *); +_LIBZFS_CORE_H int lzc_trim(const char *, pool_trim_func_t, uint64_t, boolean_t, + nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_redact(const char *, const char *, nvlist_t *); -int lzc_snaprange_space(const char *, const char *, uint64_t *); +_LIBZFS_CORE_H int lzc_snaprange_space(const char *, const char *, uint64_t *); -int lzc_hold(nvlist_t *, int, nvlist_t **); -int lzc_release(nvlist_t *, nvlist_t **); -int lzc_get_holds(const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_hold(nvlist_t *, int, nvlist_t **); +_LIBZFS_CORE_H int lzc_release(nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { LZC_SEND_FLAG_EMBED_DATA = 1 << 0, @@ -82,61 +83,66 @@ enum lzc_send_flags { LZC_SEND_FLAG_SAVED = 1 << 4, }; -int lzc_send(const char *, const char *, int, enum lzc_send_flags); -int lzc_send_resume(const char *, const char *, int, +_LIBZFS_CORE_H int lzc_send(const char *, const char *, int, + enum lzc_send_flags); +_LIBZFS_CORE_H int lzc_send_resume(const char *, const char *, int, enum lzc_send_flags, uint64_t, uint64_t); -int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); +_LIBZFS_CORE_H int lzc_send_space(const char *, const char *, + enum lzc_send_flags, uint64_t *); struct dmu_replay_record; -int lzc_send_redacted(const char *, const char *, int, enum lzc_send_flags, - const char *); -int lzc_send_resume_redacted(const char *, const char *, int, +_LIBZFS_CORE_H int lzc_send_redacted(const char *, const char *, int, + enum lzc_send_flags, const char *); +_LIBZFS_CORE_H int lzc_send_resume_redacted(const char *, const char *, int, enum lzc_send_flags, uint64_t, uint64_t, const char *); -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t, - int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int); -int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, boolean_t, int, const struct dmu_replay_record *); -int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, boolean_t, int, const struct dmu_replay_record *, int, +_LIBZFS_CORE_H int lzc_receive(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, int); +_LIBZFS_CORE_H int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, int); +_LIBZFS_CORE_H int lzc_receive_with_header(const char *, nvlist_t *, + const char *, boolean_t, boolean_t, boolean_t, int, + const struct dmu_replay_record *); +_LIBZFS_CORE_H int lzc_receive_one(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); -int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *, - uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int, - const struct dmu_replay_record *, int, uint64_t *, uint64_t *, - uint64_t *, nvlist_t **); -int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); -int lzc_send_space_resume_redacted(const char *, const char *, +_LIBZFS_CORE_H int lzc_receive_with_cmdprops(const char *, nvlist_t *, + nvlist_t *, uint8_t *, uint_t, const char *, boolean_t, boolean_t, + boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, + uint64_t *, uint64_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_send_space(const char *, const char *, + enum lzc_send_flags, uint64_t *); +_LIBZFS_CORE_H int lzc_send_space_resume_redacted(const char *, const char *, enum lzc_send_flags, uint64_t, uint64_t, uint64_t, const char *, int, uint64_t *); -uint64_t lzc_send_progress(int); +_LIBZFS_CORE_H uint64_t lzc_send_progress(int); -boolean_t lzc_exists(const char *); +_LIBZFS_CORE_H boolean_t lzc_exists(const char *); -int lzc_rollback(const char *, char *, int); -int lzc_rollback_to(const char *, const char *); +_LIBZFS_CORE_H int lzc_rollback(const char *, char *, int); +_LIBZFS_CORE_H int lzc_rollback_to(const char *, const char *); -int lzc_rename(const char *, const char *); -int lzc_destroy(const char *); +_LIBZFS_CORE_H int lzc_rename(const char *, const char *); +_LIBZFS_CORE_H int lzc_destroy(const char *); -int lzc_channel_program(const char *, const char *, uint64_t, - uint64_t, nvlist_t *, nvlist_t **); -int lzc_channel_program_nosync(const char *, const char *, uint64_t, +_LIBZFS_CORE_H int lzc_channel_program(const char *, const char *, uint64_t, uint64_t, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_channel_program_nosync(const char *, const char *, + uint64_t, uint64_t, nvlist_t *, nvlist_t **); -int lzc_sync(const char *, nvlist_t *, nvlist_t **); -int lzc_reopen(const char *, boolean_t); +_LIBZFS_CORE_H int lzc_sync(const char *, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_reopen(const char *, boolean_t); -int lzc_pool_checkpoint(const char *); -int lzc_pool_checkpoint_discard(const char *); +_LIBZFS_CORE_H int lzc_pool_checkpoint(const char *); +_LIBZFS_CORE_H int lzc_pool_checkpoint_discard(const char *); -int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *); -int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *); -int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); +_LIBZFS_CORE_H int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *); +_LIBZFS_CORE_H int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, + boolean_t *); +_LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); -int lzc_set_bootenv(const char *, const nvlist_t *); -int lzc_get_bootenv(const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); +_LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **); #ifdef __cplusplus } #endif diff --git a/include/libzfsbootenv.h b/include/libzfsbootenv.h index b078b605db7f..cbc8751dc51b 100644 --- a/include/libzfsbootenv.h +++ b/include/libzfsbootenv.h @@ -14,7 +14,7 @@ */ #ifndef _LIBZFSBOOTENV_H -#define _LIBZFSBOOTENV_H +#define _LIBZFSBOOTENV_H extern __attribute__((visibility("default"))) #ifdef __cplusplus extern "C" { @@ -25,14 +25,16 @@ typedef enum lzbe_flags { lzbe_replace /* replace current nvlist */ } lzbe_flags_t; -extern int lzbe_nvlist_get(const char *, const char *, void **); -extern int lzbe_nvlist_set(const char *, const char *, void *); -extern void lzbe_nvlist_free(void *); -extern int lzbe_add_pair(void *, const char *, const char *, void *, size_t); -extern int lzbe_remove_pair(void *, const char *); -extern int lzbe_set_boot_device(const char *, lzbe_flags_t, const char *); -extern int lzbe_get_boot_device(const char *, char **); -extern int lzbe_bootenv_print(const char *, const char *, FILE *); +_LIBZFSBOOTENV_H int lzbe_nvlist_get(const char *, const char *, void **); +_LIBZFSBOOTENV_H int lzbe_nvlist_set(const char *, const char *, void *); +_LIBZFSBOOTENV_H void lzbe_nvlist_free(void *); +_LIBZFSBOOTENV_H int lzbe_add_pair(void *, const char *, const char *, void *, + size_t); +_LIBZFSBOOTENV_H int lzbe_remove_pair(void *, const char *); +_LIBZFSBOOTENV_H int lzbe_set_boot_device(const char *, lzbe_flags_t, + const char *); +_LIBZFSBOOTENV_H int lzbe_get_boot_device(const char *, char **); +_LIBZFSBOOTENV_H int lzbe_bootenv_print(const char *, const char *, FILE *); #ifdef __cplusplus } diff --git a/include/sys/avl.h b/include/sys/avl.h index ed3c6f86a568..20e88f2a6b06 100644 --- a/include/sys/avl.h +++ b/include/sys/avl.h @@ -28,7 +28,7 @@ */ #ifndef _AVL_H -#define _AVL_H +#define _AVL_H extern __attribute__((visibility("default"))) /* * This is a private header file. Applications should not directly include @@ -160,7 +160,7 @@ typedef uintptr_t avl_index_t; * size - the value of sizeof(struct my_type) * offset - the value of OFFSETOF(struct my_type, my_link) */ -extern void avl_create(avl_tree_t *tree, +_AVL_H void avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *), size_t size, size_t offset); @@ -172,7 +172,7 @@ extern void avl_create(avl_tree_t *tree, * node - node that has the value being looked for * where - position for use with avl_nearest() or avl_insert(), may be NULL */ -extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where); +_AVL_H void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where); /* * Insert a node into the tree. @@ -180,7 +180,7 @@ extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where); * node - the node to insert * where - position as returned from avl_find() */ -extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where); +_AVL_H void avl_insert(avl_tree_t *tree, void *node, avl_index_t where); /* * Insert "new_data" in "tree" in the given "direction" either after @@ -193,7 +193,7 @@ extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where); * here - existing node in "tree" * direction - either AVL_AFTER or AVL_BEFORE the data "here". */ -extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here, +_AVL_H void avl_insert_here(avl_tree_t *tree, void *new_data, void *here, int direction); @@ -202,8 +202,8 @@ extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here, * if the tree is empty. * */ -extern void *avl_first(avl_tree_t *tree); -extern void *avl_last(avl_tree_t *tree); +_AVL_H void *avl_first(avl_tree_t *tree); +_AVL_H void *avl_last(avl_tree_t *tree); /* @@ -239,7 +239,7 @@ extern void *avl_last(avl_tree_t *tree); * else * less = avl_nearest(tree, where, AVL_BEFORE); */ -extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction); +_AVL_H void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction); /* @@ -249,7 +249,7 @@ extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction); * * node - the node to add */ -extern void avl_add(avl_tree_t *tree, void *node); +_AVL_H void avl_add(avl_tree_t *tree, void *node); /* @@ -257,7 +257,7 @@ extern void avl_add(avl_tree_t *tree, void *node); * * node - the node to remove */ -extern void avl_remove(avl_tree_t *tree, void *node); +_AVL_H void avl_remove(avl_tree_t *tree, void *node); /* * Reinsert a node only if its order has changed relative to its nearest @@ -266,24 +266,24 @@ extern void avl_remove(avl_tree_t *tree, void *node); * avl_update_gt() only if you know the direction in which the order of the * node may change. */ -extern boolean_t avl_update(avl_tree_t *, void *); -extern boolean_t avl_update_lt(avl_tree_t *, void *); -extern boolean_t avl_update_gt(avl_tree_t *, void *); +_AVL_H boolean_t avl_update(avl_tree_t *, void *); +_AVL_H boolean_t avl_update_lt(avl_tree_t *, void *); +_AVL_H boolean_t avl_update_gt(avl_tree_t *, void *); /* * Swaps the contents of the two trees. */ -extern void avl_swap(avl_tree_t *tree1, avl_tree_t *tree2); +_AVL_H void avl_swap(avl_tree_t *tree1, avl_tree_t *tree2); /* * Return the number of nodes in the tree */ -extern ulong_t avl_numnodes(avl_tree_t *tree); +_AVL_H ulong_t avl_numnodes(avl_tree_t *tree); /* * Return B_TRUE if there are zero nodes in the tree, B_FALSE otherwise. */ -extern boolean_t avl_is_empty(avl_tree_t *tree); +_AVL_H boolean_t avl_is_empty(avl_tree_t *tree); /* * Used to destroy any remaining nodes in a tree. The cookie argument should @@ -306,7 +306,7 @@ extern boolean_t avl_is_empty(avl_tree_t *tree); * free(node); * avl_destroy(tree); */ -extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie); +_AVL_H void *avl_destroy_nodes(avl_tree_t *tree, void **cookie); /* @@ -314,7 +314,7 @@ extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie); * * tree - the empty tree to destroy */ -extern void avl_destroy(avl_tree_t *tree); +_AVL_H void avl_destroy(avl_tree_t *tree); diff --git a/include/sys/avl_impl.h b/include/sys/avl_impl.h index fddf76906dee..f577ecd42f7c 100644 --- a/include/sys/avl_impl.h +++ b/include/sys/avl_impl.h @@ -25,8 +25,7 @@ */ #ifndef _AVL_IMPL_H -#define _AVL_IMPL_H - +#define _AVL_IMPL_H extern __attribute__((visibility("default"))) /* @@ -155,7 +154,7 @@ struct avl_tree { /* * This will only by used via AVL_NEXT() or AVL_PREV() */ -extern void *avl_walk(struct avl_tree *, void *, int); +_AVL_IMPL_H void *avl_walk(struct avl_tree *, void *, int); #ifdef __cplusplus } diff --git a/include/sys/efi_partition.h b/include/sys/efi_partition.h index fbb58013e22a..cda2c98e5d53 100644 --- a/include/sys/efi_partition.h +++ b/include/sys/efi_partition.h @@ -24,7 +24,7 @@ */ #ifndef _SYS_EFI_PARTITION_H -#define _SYS_EFI_PARTITION_H +#define _SYS_EFI_PARTITION_H extern __attribute__((visibility("default"))) #include @@ -363,14 +363,15 @@ struct partition64 { #endif #ifndef _KERNEL -extern int efi_alloc_and_init(int, uint32_t, struct dk_gpt **); -extern int efi_alloc_and_read(int, struct dk_gpt **); -extern int efi_write(int, struct dk_gpt *); -extern int efi_rescan(int); -extern void efi_free(struct dk_gpt *); -extern int efi_type(int); -extern void efi_err_check(struct dk_gpt *); -extern int efi_use_whole_disk(int fd); +_SYS_EFI_PARTITION_H int efi_debug; +_SYS_EFI_PARTITION_H int efi_alloc_and_init(int, uint32_t, struct dk_gpt **); +_SYS_EFI_PARTITION_H int efi_alloc_and_read(int, struct dk_gpt **); +_SYS_EFI_PARTITION_H int efi_write(int, struct dk_gpt *); +_SYS_EFI_PARTITION_H int efi_rescan(int); +_SYS_EFI_PARTITION_H void efi_free(struct dk_gpt *); +_SYS_EFI_PARTITION_H int efi_type(int); +_SYS_EFI_PARTITION_H void efi_err_check(struct dk_gpt *); +_SYS_EFI_PARTITION_H int efi_use_whole_disk(int fd); #endif #ifdef __cplusplus diff --git a/include/sys/nvpair.h b/include/sys/nvpair.h index b0be8bd7ada1..76d383a3c681 100644 --- a/include/sys/nvpair.h +++ b/include/sys/nvpair.h @@ -24,7 +24,7 @@ */ #ifndef _SYS_NVPAIR_H -#define _SYS_NVPAIR_H +#define _SYS_NVPAIR_H extern __attribute__((visibility("default"))) #include #include @@ -135,221 +135,270 @@ struct nv_alloc_ops { void (*nv_ao_reset)(nv_alloc_t *); }; -extern const nv_alloc_ops_t *nv_fixed_ops; -extern nv_alloc_t *nv_alloc_nosleep; +_SYS_NVPAIR_H const nv_alloc_ops_t *nv_fixed_ops; +_SYS_NVPAIR_H nv_alloc_t *nv_alloc_nosleep; #if defined(_KERNEL) -extern nv_alloc_t *nv_alloc_sleep; -extern nv_alloc_t *nv_alloc_pushpage; +_SYS_NVPAIR_H nv_alloc_t *nv_alloc_sleep; +_SYS_NVPAIR_H nv_alloc_t *nv_alloc_pushpage; #endif -int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...); -void nv_alloc_reset(nv_alloc_t *); -void nv_alloc_fini(nv_alloc_t *); +_SYS_NVPAIR_H int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, + /* args */ ...); +_SYS_NVPAIR_H void nv_alloc_reset(nv_alloc_t *); +_SYS_NVPAIR_H void nv_alloc_fini(nv_alloc_t *); /* list management */ -int nvlist_alloc(nvlist_t **, uint_t, int); -void nvlist_free(nvlist_t *); -int nvlist_size(nvlist_t *, size_t *, int); -int nvlist_pack(nvlist_t *, char **, size_t *, int, int); -int nvlist_unpack(char *, size_t, nvlist_t **, int); -int nvlist_dup(nvlist_t *, nvlist_t **, int); -int nvlist_merge(nvlist_t *, nvlist_t *, int); - -uint_t nvlist_nvflag(nvlist_t *); - -int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *); -int nvlist_xpack(nvlist_t *, char **, size_t *, int, nv_alloc_t *); -int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *); -int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *); -nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *); - -int nvlist_add_nvpair(nvlist_t *, nvpair_t *); -int nvlist_add_boolean(nvlist_t *, const char *); -int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); -int nvlist_add_byte(nvlist_t *, const char *, uchar_t); -int nvlist_add_int8(nvlist_t *, const char *, int8_t); -int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); -int nvlist_add_int16(nvlist_t *, const char *, int16_t); -int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); -int nvlist_add_int32(nvlist_t *, const char *, int32_t); -int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); -int nvlist_add_int64(nvlist_t *, const char *, int64_t); -int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); -int nvlist_add_string(nvlist_t *, const char *, const char *); -int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); -int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t); -int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t); -int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t); -int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t); -int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t); -int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t); -int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t); -int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t); -int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t); -int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t); -int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t); -int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t); -int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t); +_SYS_NVPAIR_H int nvlist_alloc(nvlist_t **, uint_t, int); +_SYS_NVPAIR_H void nvlist_free(nvlist_t *); +_SYS_NVPAIR_H int nvlist_size(nvlist_t *, size_t *, int); +_SYS_NVPAIR_H int nvlist_pack(nvlist_t *, char **, size_t *, int, int); +_SYS_NVPAIR_H int nvlist_unpack(char *, size_t, nvlist_t **, int); +_SYS_NVPAIR_H int nvlist_dup(nvlist_t *, nvlist_t **, int); +_SYS_NVPAIR_H int nvlist_merge(nvlist_t *, nvlist_t *, int); + +_SYS_NVPAIR_H uint_t nvlist_nvflag(nvlist_t *); + +_SYS_NVPAIR_H int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *); +_SYS_NVPAIR_H int nvlist_xpack(nvlist_t *, char **, size_t *, int, + nv_alloc_t *); +_SYS_NVPAIR_H int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *); +_SYS_NVPAIR_H int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *); +_SYS_NVPAIR_H nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *); + +_SYS_NVPAIR_H int nvlist_add_nvpair(nvlist_t *, nvpair_t *); +_SYS_NVPAIR_H int nvlist_add_boolean(nvlist_t *, const char *); +_SYS_NVPAIR_H int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); +_SYS_NVPAIR_H int nvlist_add_byte(nvlist_t *, const char *, uchar_t); +_SYS_NVPAIR_H int nvlist_add_int8(nvlist_t *, const char *, int8_t); +_SYS_NVPAIR_H int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); +_SYS_NVPAIR_H int nvlist_add_int16(nvlist_t *, const char *, int16_t); +_SYS_NVPAIR_H int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); +_SYS_NVPAIR_H int nvlist_add_int32(nvlist_t *, const char *, int32_t); +_SYS_NVPAIR_H int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); +_SYS_NVPAIR_H int nvlist_add_int64(nvlist_t *, const char *, int64_t); +_SYS_NVPAIR_H int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); +_SYS_NVPAIR_H int nvlist_add_string(nvlist_t *, const char *, const char *); +_SYS_NVPAIR_H int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); +_SYS_NVPAIR_H int nvlist_add_boolean_array(nvlist_t *, const char *, + boolean_t *, uint_t); +_SYS_NVPAIR_H int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, + uint_t); +_SYS_NVPAIR_H int nvlist_add_string_array(nvlist_t *, const char *, + char * const *, uint_t); +_SYS_NVPAIR_H int nvlist_add_nvlist_array(nvlist_t *, const char *, + nvlist_t **, uint_t); +_SYS_NVPAIR_H int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t); #if !defined(_KERNEL) && !defined(_STANDALONE) -int nvlist_add_double(nvlist_t *, const char *, double); +_SYS_NVPAIR_H int nvlist_add_double(nvlist_t *, const char *, double); #endif -int nvlist_remove(nvlist_t *, const char *, data_type_t); -int nvlist_remove_all(nvlist_t *, const char *); -int nvlist_remove_nvpair(nvlist_t *, nvpair_t *); - -int nvlist_lookup_boolean(nvlist_t *, const char *); -int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *); -int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *); -int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *); -int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *); -int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *); -int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *); -int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *); -int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *); -int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *); -int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *); -int nvlist_lookup_string(nvlist_t *, const char *, char **); -int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **); -int nvlist_lookup_boolean_array(nvlist_t *, const char *, +_SYS_NVPAIR_H int nvlist_remove(nvlist_t *, const char *, data_type_t); +_SYS_NVPAIR_H int nvlist_remove_all(nvlist_t *, const char *); +_SYS_NVPAIR_H int nvlist_remove_nvpair(nvlist_t *, nvpair_t *); + +_SYS_NVPAIR_H int nvlist_lookup_boolean(nvlist_t *, const char *); +_SYS_NVPAIR_H int nvlist_lookup_boolean_value(nvlist_t *, const char *, + boolean_t *); +_SYS_NVPAIR_H int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *); +_SYS_NVPAIR_H int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *); +_SYS_NVPAIR_H int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *); +_SYS_NVPAIR_H int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *); +_SYS_NVPAIR_H int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *); +_SYS_NVPAIR_H int nvlist_lookup_string(nvlist_t *, const char *, char **); +_SYS_NVPAIR_H int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **); +_SYS_NVPAIR_H int nvlist_lookup_boolean_array(nvlist_t *, const char *, boolean_t **, uint_t *); -int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, uint_t *); -int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, uint_t *); -int nvlist_lookup_uint8_array(nvlist_t *, const char *, uint8_t **, uint_t *); -int nvlist_lookup_int16_array(nvlist_t *, const char *, int16_t **, uint_t *); -int nvlist_lookup_uint16_array(nvlist_t *, const char *, uint16_t **, uint_t *); -int nvlist_lookup_int32_array(nvlist_t *, const char *, int32_t **, uint_t *); -int nvlist_lookup_uint32_array(nvlist_t *, const char *, uint32_t **, uint_t *); -int nvlist_lookup_int64_array(nvlist_t *, const char *, int64_t **, uint_t *); -int nvlist_lookup_uint64_array(nvlist_t *, const char *, uint64_t **, uint_t *); -int nvlist_lookup_string_array(nvlist_t *, const char *, char ***, uint_t *); -int nvlist_lookup_nvlist_array(nvlist_t *, const char *, +_SYS_NVPAIR_H int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, + uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, + uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint8_array(nvlist_t *, const char *, + uint8_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_int16_array(nvlist_t *, const char *, + int16_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint16_array(nvlist_t *, const char *, + uint16_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_int32_array(nvlist_t *, const char *, + int32_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint32_array(nvlist_t *, const char *, + uint32_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_int64_array(nvlist_t *, const char *, + int64_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_uint64_array(nvlist_t *, const char *, + uint64_t **, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_string_array(nvlist_t *, const char *, + char ***, uint_t *); +_SYS_NVPAIR_H int nvlist_lookup_nvlist_array(nvlist_t *, const char *, nvlist_t ***, uint_t *); -int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *); -int nvlist_lookup_pairs(nvlist_t *, int, ...); +_SYS_NVPAIR_H int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *); +_SYS_NVPAIR_H int nvlist_lookup_pairs(nvlist_t *, int, ...); #if !defined(_KERNEL) && !defined(_STANDALONE) -int nvlist_lookup_double(nvlist_t *, const char *, double *); +_SYS_NVPAIR_H int nvlist_lookup_double(nvlist_t *, const char *, double *); #endif -int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **); -int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, nvpair_t **, - int *, char **); -boolean_t nvlist_exists(nvlist_t *, const char *); -boolean_t nvlist_empty(nvlist_t *); +_SYS_NVPAIR_H int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **); +_SYS_NVPAIR_H int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, + nvpair_t **, int *, char **); +_SYS_NVPAIR_H boolean_t nvlist_exists(nvlist_t *, const char *); +_SYS_NVPAIR_H boolean_t nvlist_empty(nvlist_t *); /* processing nvpair */ -nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *); -nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *); -char *nvpair_name(nvpair_t *); -data_type_t nvpair_type(nvpair_t *); -int nvpair_type_is_array(nvpair_t *); -int nvpair_value_boolean_value(nvpair_t *, boolean_t *); -int nvpair_value_byte(nvpair_t *, uchar_t *); -int nvpair_value_int8(nvpair_t *, int8_t *); -int nvpair_value_uint8(nvpair_t *, uint8_t *); -int nvpair_value_int16(nvpair_t *, int16_t *); -int nvpair_value_uint16(nvpair_t *, uint16_t *); -int nvpair_value_int32(nvpair_t *, int32_t *); -int nvpair_value_uint32(nvpair_t *, uint32_t *); -int nvpair_value_int64(nvpair_t *, int64_t *); -int nvpair_value_uint64(nvpair_t *, uint64_t *); -int nvpair_value_string(nvpair_t *, char **); -int nvpair_value_nvlist(nvpair_t *, nvlist_t **); -int nvpair_value_boolean_array(nvpair_t *, boolean_t **, uint_t *); -int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *); -int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *); -int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *); -int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *); -int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *); -int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *); -int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *); -int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *); -int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *); -int nvpair_value_string_array(nvpair_t *, char ***, uint_t *); -int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *); -int nvpair_value_hrtime(nvpair_t *, hrtime_t *); +_SYS_NVPAIR_H nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *); +_SYS_NVPAIR_H nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *); +_SYS_NVPAIR_H char *nvpair_name(nvpair_t *); +_SYS_NVPAIR_H data_type_t nvpair_type(nvpair_t *); +_SYS_NVPAIR_H int nvpair_type_is_array(nvpair_t *); +_SYS_NVPAIR_H int nvpair_value_boolean_value(nvpair_t *, boolean_t *); +_SYS_NVPAIR_H int nvpair_value_byte(nvpair_t *, uchar_t *); +_SYS_NVPAIR_H int nvpair_value_int8(nvpair_t *, int8_t *); +_SYS_NVPAIR_H int nvpair_value_uint8(nvpair_t *, uint8_t *); +_SYS_NVPAIR_H int nvpair_value_int16(nvpair_t *, int16_t *); +_SYS_NVPAIR_H int nvpair_value_uint16(nvpair_t *, uint16_t *); +_SYS_NVPAIR_H int nvpair_value_int32(nvpair_t *, int32_t *); +_SYS_NVPAIR_H int nvpair_value_uint32(nvpair_t *, uint32_t *); +_SYS_NVPAIR_H int nvpair_value_int64(nvpair_t *, int64_t *); +_SYS_NVPAIR_H int nvpair_value_uint64(nvpair_t *, uint64_t *); +_SYS_NVPAIR_H int nvpair_value_string(nvpair_t *, char **); +_SYS_NVPAIR_H int nvpair_value_nvlist(nvpair_t *, nvlist_t **); +_SYS_NVPAIR_H int nvpair_value_boolean_array(nvpair_t *, boolean_t **, + uint_t *); +_SYS_NVPAIR_H int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *); +_SYS_NVPAIR_H int nvpair_value_string_array(nvpair_t *, char ***, uint_t *); +_SYS_NVPAIR_H int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *); +_SYS_NVPAIR_H int nvpair_value_hrtime(nvpair_t *, hrtime_t *); #if !defined(_KERNEL) && !defined(_STANDALONE) -int nvpair_value_double(nvpair_t *, double *); +_SYS_NVPAIR_H int nvpair_value_double(nvpair_t *, double *); #endif -nvlist_t *fnvlist_alloc(void); -void fnvlist_free(nvlist_t *); -size_t fnvlist_size(nvlist_t *); -char *fnvlist_pack(nvlist_t *, size_t *); -void fnvlist_pack_free(char *, size_t); -nvlist_t *fnvlist_unpack(char *, size_t); -nvlist_t *fnvlist_dup(nvlist_t *); -void fnvlist_merge(nvlist_t *, nvlist_t *); -size_t fnvlist_num_pairs(nvlist_t *); - -void fnvlist_add_boolean(nvlist_t *, const char *); -void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); -void fnvlist_add_byte(nvlist_t *, const char *, uchar_t); -void fnvlist_add_int8(nvlist_t *, const char *, int8_t); -void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t); -void fnvlist_add_int16(nvlist_t *, const char *, int16_t); -void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t); -void fnvlist_add_int32(nvlist_t *, const char *, int32_t); -void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t); -void fnvlist_add_int64(nvlist_t *, const char *, int64_t); -void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t); -void fnvlist_add_string(nvlist_t *, const char *, const char *); -void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); -void fnvlist_add_nvpair(nvlist_t *, nvpair_t *); -void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t); -void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t); -void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t); -void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t); -void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t); -void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t); -void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t); -void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t); -void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t); -void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t); -void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t); -void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t); - -void fnvlist_remove(nvlist_t *, const char *); -void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *); - -nvpair_t *fnvlist_lookup_nvpair(nvlist_t *, const char *); -boolean_t fnvlist_lookup_boolean(nvlist_t *, const char *); -boolean_t fnvlist_lookup_boolean_value(nvlist_t *, const char *); -uchar_t fnvlist_lookup_byte(nvlist_t *, const char *); -int8_t fnvlist_lookup_int8(nvlist_t *, const char *); -int16_t fnvlist_lookup_int16(nvlist_t *, const char *); -int32_t fnvlist_lookup_int32(nvlist_t *, const char *); -int64_t fnvlist_lookup_int64(nvlist_t *, const char *); -uint8_t fnvlist_lookup_uint8(nvlist_t *, const char *); -uint16_t fnvlist_lookup_uint16(nvlist_t *, const char *); -uint32_t fnvlist_lookup_uint32(nvlist_t *, const char *); -uint64_t fnvlist_lookup_uint64(nvlist_t *, const char *); -char *fnvlist_lookup_string(nvlist_t *, const char *); -nvlist_t *fnvlist_lookup_nvlist(nvlist_t *, const char *); -boolean_t *fnvlist_lookup_boolean_array(nvlist_t *, const char *, uint_t *); -uchar_t *fnvlist_lookup_byte_array(nvlist_t *, const char *, uint_t *); -int8_t *fnvlist_lookup_int8_array(nvlist_t *, const char *, uint_t *); -uint8_t *fnvlist_lookup_uint8_array(nvlist_t *, const char *, uint_t *); -int16_t *fnvlist_lookup_int16_array(nvlist_t *, const char *, uint_t *); -uint16_t *fnvlist_lookup_uint16_array(nvlist_t *, const char *, uint_t *); -int32_t *fnvlist_lookup_int32_array(nvlist_t *, const char *, uint_t *); -uint32_t *fnvlist_lookup_uint32_array(nvlist_t *, const char *, uint_t *); -int64_t *fnvlist_lookup_int64_array(nvlist_t *, const char *, uint_t *); -uint64_t *fnvlist_lookup_uint64_array(nvlist_t *, const char *, uint_t *); - -boolean_t fnvpair_value_boolean_value(nvpair_t *nvp); -uchar_t fnvpair_value_byte(nvpair_t *nvp); -int8_t fnvpair_value_int8(nvpair_t *nvp); -int16_t fnvpair_value_int16(nvpair_t *nvp); -int32_t fnvpair_value_int32(nvpair_t *nvp); -int64_t fnvpair_value_int64(nvpair_t *nvp); -uint8_t fnvpair_value_uint8(nvpair_t *nvp); -uint16_t fnvpair_value_uint16(nvpair_t *nvp); -uint32_t fnvpair_value_uint32(nvpair_t *nvp); -uint64_t fnvpair_value_uint64(nvpair_t *nvp); -char *fnvpair_value_string(nvpair_t *nvp); -nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp); +_SYS_NVPAIR_H nvlist_t *fnvlist_alloc(void); +_SYS_NVPAIR_H void fnvlist_free(nvlist_t *); +_SYS_NVPAIR_H size_t fnvlist_size(nvlist_t *); +_SYS_NVPAIR_H char *fnvlist_pack(nvlist_t *, size_t *); +_SYS_NVPAIR_H void fnvlist_pack_free(char *, size_t); +_SYS_NVPAIR_H nvlist_t *fnvlist_unpack(char *, size_t); +_SYS_NVPAIR_H nvlist_t *fnvlist_dup(nvlist_t *); +_SYS_NVPAIR_H void fnvlist_merge(nvlist_t *, nvlist_t *); +_SYS_NVPAIR_H size_t fnvlist_num_pairs(nvlist_t *); + +_SYS_NVPAIR_H void fnvlist_add_boolean(nvlist_t *, const char *); +_SYS_NVPAIR_H void fnvlist_add_boolean_value(nvlist_t *, const char *, + boolean_t); +_SYS_NVPAIR_H void fnvlist_add_byte(nvlist_t *, const char *, uchar_t); +_SYS_NVPAIR_H void fnvlist_add_int8(nvlist_t *, const char *, int8_t); +_SYS_NVPAIR_H void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t); +_SYS_NVPAIR_H void fnvlist_add_int16(nvlist_t *, const char *, int16_t); +_SYS_NVPAIR_H void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t); +_SYS_NVPAIR_H void fnvlist_add_int32(nvlist_t *, const char *, int32_t); +_SYS_NVPAIR_H void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t); +_SYS_NVPAIR_H void fnvlist_add_int64(nvlist_t *, const char *, int64_t); +_SYS_NVPAIR_H void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t); +_SYS_NVPAIR_H void fnvlist_add_string(nvlist_t *, const char *, const char *); +_SYS_NVPAIR_H void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); +_SYS_NVPAIR_H void fnvlist_add_nvpair(nvlist_t *, nvpair_t *); +_SYS_NVPAIR_H void fnvlist_add_boolean_array(nvlist_t *, const char *, + boolean_t *, uint_t); +_SYS_NVPAIR_H void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_uint16_array(nvlist_t *, const char *, + uint16_t *, uint_t); +_SYS_NVPAIR_H void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_uint32_array(nvlist_t *, const char *, + uint32_t *, uint_t); +_SYS_NVPAIR_H void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, + uint_t); +_SYS_NVPAIR_H void fnvlist_add_uint64_array(nvlist_t *, const char *, + uint64_t *, uint_t); +_SYS_NVPAIR_H void fnvlist_add_string_array(nvlist_t *, const char *, + char * const *, uint_t); +_SYS_NVPAIR_H void fnvlist_add_nvlist_array(nvlist_t *, const char *, + nvlist_t **, uint_t); + +_SYS_NVPAIR_H void fnvlist_remove(nvlist_t *, const char *); +_SYS_NVPAIR_H void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *); + +_SYS_NVPAIR_H nvpair_t *fnvlist_lookup_nvpair(nvlist_t *, const char *); +_SYS_NVPAIR_H boolean_t fnvlist_lookup_boolean(nvlist_t *, const char *); +_SYS_NVPAIR_H boolean_t fnvlist_lookup_boolean_value(nvlist_t *, const char *); +_SYS_NVPAIR_H uchar_t fnvlist_lookup_byte(nvlist_t *, const char *); +_SYS_NVPAIR_H int8_t fnvlist_lookup_int8(nvlist_t *, const char *); +_SYS_NVPAIR_H int16_t fnvlist_lookup_int16(nvlist_t *, const char *); +_SYS_NVPAIR_H int32_t fnvlist_lookup_int32(nvlist_t *, const char *); +_SYS_NVPAIR_H int64_t fnvlist_lookup_int64(nvlist_t *, const char *); +_SYS_NVPAIR_H uint8_t fnvlist_lookup_uint8(nvlist_t *, const char *); +_SYS_NVPAIR_H uint16_t fnvlist_lookup_uint16(nvlist_t *, const char *); +_SYS_NVPAIR_H uint32_t fnvlist_lookup_uint32(nvlist_t *, const char *); +_SYS_NVPAIR_H uint64_t fnvlist_lookup_uint64(nvlist_t *, const char *); +_SYS_NVPAIR_H char *fnvlist_lookup_string(nvlist_t *, const char *); +_SYS_NVPAIR_H nvlist_t *fnvlist_lookup_nvlist(nvlist_t *, const char *); +_SYS_NVPAIR_H boolean_t *fnvlist_lookup_boolean_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H uchar_t *fnvlist_lookup_byte_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H int8_t *fnvlist_lookup_int8_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H uint8_t *fnvlist_lookup_uint8_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H int16_t *fnvlist_lookup_int16_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H uint16_t *fnvlist_lookup_uint16_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H int32_t *fnvlist_lookup_int32_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H uint32_t *fnvlist_lookup_uint32_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H int64_t *fnvlist_lookup_int64_array(nvlist_t *, const char *, + uint_t *); +_SYS_NVPAIR_H uint64_t *fnvlist_lookup_uint64_array(nvlist_t *, const char *, + uint_t *); + +_SYS_NVPAIR_H boolean_t fnvpair_value_boolean_value(nvpair_t *nvp); +_SYS_NVPAIR_H uchar_t fnvpair_value_byte(nvpair_t *nvp); +_SYS_NVPAIR_H int8_t fnvpair_value_int8(nvpair_t *nvp); +_SYS_NVPAIR_H int16_t fnvpair_value_int16(nvpair_t *nvp); +_SYS_NVPAIR_H int32_t fnvpair_value_int32(nvpair_t *nvp); +_SYS_NVPAIR_H int64_t fnvpair_value_int64(nvpair_t *nvp); +_SYS_NVPAIR_H uint8_t fnvpair_value_uint8(nvpair_t *nvp); +_SYS_NVPAIR_H uint16_t fnvpair_value_uint16(nvpair_t *nvp); +_SYS_NVPAIR_H uint32_t fnvpair_value_uint32(nvpair_t *nvp); +_SYS_NVPAIR_H uint64_t fnvpair_value_uint64(nvpair_t *nvp); +_SYS_NVPAIR_H char *fnvpair_value_string(nvpair_t *nvp); +_SYS_NVPAIR_H nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp); #ifdef __cplusplus } diff --git a/include/thread_pool.h b/include/thread_pool.h index 57266f11c5a6..43090c3c6644 100644 --- a/include/thread_pool.h +++ b/include/thread_pool.h @@ -25,7 +25,7 @@ */ #ifndef _THREAD_POOL_H_ -#define _THREAD_POOL_H_ +#define _THREAD_POOL_H_ extern __attribute__((visibility("default"))) #include #include @@ -37,33 +37,17 @@ extern "C" { typedef struct tpool tpool_t; /* opaque thread pool descriptor */ -#if defined(__STDC__) - -extern tpool_t *tpool_create(uint_t min_threads, uint_t max_threads, +_THREAD_POOL_H_ tpool_t *tpool_create(uint_t min_threads, uint_t max_threads, uint_t linger, pthread_attr_t *attr); -extern int tpool_dispatch(tpool_t *tpool, +_THREAD_POOL_H_ int tpool_dispatch(tpool_t *tpool, void (*func)(void *), void *arg); -extern void tpool_destroy(tpool_t *tpool); -extern void tpool_abandon(tpool_t *tpool); -extern void tpool_wait(tpool_t *tpool); -extern void tpool_suspend(tpool_t *tpool); -extern int tpool_suspended(tpool_t *tpool); -extern void tpool_resume(tpool_t *tpool); -extern int tpool_member(tpool_t *tpool); - -#else /* Non ANSI */ - -extern tpool_t *tpool_create(); -extern int tpool_dispatch(); -extern void tpool_destroy(); -extern void tpool_abandon(); -extern void tpool_wait(); -extern void tpool_suspend(); -extern int tpool_suspended(); -extern void tpool_resume(); -extern int tpool_member(); - -#endif /* __STDC__ */ +_THREAD_POOL_H_ void tpool_destroy(tpool_t *tpool); +_THREAD_POOL_H_ void tpool_abandon(tpool_t *tpool); +_THREAD_POOL_H_ void tpool_wait(tpool_t *tpool); +_THREAD_POOL_H_ void tpool_suspend(tpool_t *tpool); +_THREAD_POOL_H_ int tpool_suspended(tpool_t *tpool); +_THREAD_POOL_H_ void tpool_resume(tpool_t *tpool); +_THREAD_POOL_H_ int tpool_member(tpool_t *tpool); #ifdef __cplusplus } diff --git a/lib/libavl/Makefile.am b/lib/libavl/Makefile.am index 2e0a431c77fb..3166febd02c5 100644 --- a/lib/libavl/Makefile.am +++ b/lib/libavl/Makefile.am @@ -4,6 +4,7 @@ VPATH = $(top_srcdir)/module/avl/ # Includes kernel code, generate warnings for large stack frames AM_CFLAGS += $(FRAME_LARGER_THAN) +AM_CFLAGS += -fvisibility=hidden noinst_LTLIBRARIES = libavl.la diff --git a/lib/libefi/Makefile.am b/lib/libefi/Makefile.am index b26f7a6dcd5b..580319a31495 100644 --- a/lib/libefi/Makefile.am +++ b/lib/libefi/Makefile.am @@ -1,6 +1,7 @@ include $(top_srcdir)/config/Rules.am AM_CFLAGS += $(LIBUUID_CFLAGS) $(ZLIB_CFLAGS) +AM_CFLAGS += -fvisibility=hidden noinst_LTLIBRARIES = libefi.la diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 39b3509a5c4b..fd243e230e7d 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -140,40 +140,6 @@ static struct uuid_to_ptag { { EFI_FREEDESKTOP_BOOT } }; -/* - * Default vtoc information for non-SVr4 partitions - */ -struct dk_map2 default_vtoc_map[NDKMAP] = { - { V_ROOT, 0 }, /* a - 0 */ - { V_SWAP, V_UNMNT }, /* b - 1 */ - { V_BACKUP, V_UNMNT }, /* c - 2 */ - { V_UNASSIGNED, 0 }, /* d - 3 */ - { V_UNASSIGNED, 0 }, /* e - 4 */ - { V_UNASSIGNED, 0 }, /* f - 5 */ - { V_USR, 0 }, /* g - 6 */ - { V_UNASSIGNED, 0 }, /* h - 7 */ - -#if defined(_SUNOS_VTOC_16) - -#if defined(i386) || defined(__amd64) || defined(__arm) || \ - defined(__powerpc) || defined(__sparc) || defined(__s390__) || \ - defined(__mips__) || defined(__rv64g__) - { V_BOOT, V_UNMNT }, /* i - 8 */ - { V_ALTSCTR, 0 }, /* j - 9 */ - -#else -#error No VTOC format defined. -#endif /* defined(i386) */ - - { V_UNASSIGNED, 0 }, /* k - 10 */ - { V_UNASSIGNED, 0 }, /* l - 11 */ - { V_UNASSIGNED, 0 }, /* m - 12 */ - { V_UNASSIGNED, 0 }, /* n - 13 */ - { V_UNASSIGNED, 0 }, /* o - 14 */ - { V_UNASSIGNED, 0 }, /* p - 15 */ -#endif /* defined(_SUNOS_VTOC_16) */ -}; - int efi_debug = 0; static int efi_read(int, struct dk_gpt *); diff --git a/lib/libnvpair/Makefile.am b/lib/libnvpair/Makefile.am index a3e1fa307f7c..7b9ebebe7906 100644 --- a/lib/libnvpair/Makefile.am +++ b/lib/libnvpair/Makefile.am @@ -7,6 +7,7 @@ VPATH = \ # Includes kernel code, generate warnings for large stack frames # and required CFLAGS for libtirpc AM_CFLAGS += $(FRAME_LARGER_THAN) $(LIBTIRPC_CFLAGS) +AM_CFLAGS += -fvisibility=hidden lib_LTLIBRARIES = libnvpair.la diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index 697a67b7fc61..8c503fecd152 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -232,132 +232,126 @@ - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -367,7 +361,7 @@ - + @@ -376,677 +370,677 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - - - - - + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + + + + - - + + - - - - - + + + + + - - - + + + - + - - - - + + + + - - - + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - + - - + + - - - + + + - + - + - + - + - + - + - + @@ -1078,280 +1072,280 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - - - - - - + + + + + + - + - + - + - + - + - + @@ -1359,11 +1353,11 @@ - - - - - + + + + + @@ -1823,45 +1817,45 @@ - - - - + + + + - + - + - + - + - + - + - + - + - + @@ -1869,36 +1863,36 @@ - + - + - + - + - + - + - + - + - + - + @@ -1907,7 +1901,7 @@ - + @@ -1915,46 +1909,46 @@ - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1962,13 +1956,10 @@ - + - - - - + @@ -1990,721 +1981,718 @@ - - - + + - - - - - - + + + + - - + + - - + + - - + + - - + + - - - - + + + + - - - - + + + + - - + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - + + + + - - - - + + + + - - - - - + + + + + - - - + + + - - - + + + - - + + - - + + - - + + - - + + - - - + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - + + + + - - - + + + - - - - + + + + - - - - - - + + + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - - - + + + + - - - - + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - - - - + + + + @@ -2718,101 +2706,101 @@ - + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - + + + + + - - - - - + + + + + - + - + - + - + - + - + + + + + + + + + + + - + - + - + - - - - - - - - - - - + @@ -2874,40 +2862,40 @@ - - + + - + - - + + - + - - + + - + - - - + + + - + @@ -2915,734 +2903,734 @@ - - - + + + - - - + + + - + - - + + - + - - - + + + - + - - + + - - - + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - - - + + + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + diff --git a/lib/libshare/Makefile.am b/lib/libshare/Makefile.am index e42609c6496d..dff3e5382d6e 100644 --- a/lib/libshare/Makefile.am +++ b/lib/libshare/Makefile.am @@ -2,6 +2,8 @@ include $(top_srcdir)/config/Rules.am DEFAULT_INCLUDES += -I$(srcdir) +AM_CFLAGS += -fvisibility=hidden + noinst_LTLIBRARIES = libshare.la USER_C = \ diff --git a/lib/libshare/nfs.c b/lib/libshare/nfs.c index 44d3e93d42f0..e339ebc81f1b 100644 --- a/lib/libshare/nfs.c +++ b/lib/libshare/nfs.c @@ -119,7 +119,7 @@ nfs_fini_tmpfile(const char *exports, char *tmpfile) return (SA_OK); } -__attribute__((visibility("hidden"))) int +int nfs_toggle_share(const char *lockfile, const char *exports, const char *expdir, sa_share_impl_t impl_share, int(*cbk)(sa_share_impl_t impl_share, char *filename)) diff --git a/lib/libshare/os/freebsd/nfs.c b/lib/libshare/os/freebsd/nfs.c index b82491f2ebe8..0041bc228bb5 100644 --- a/lib/libshare/os/freebsd/nfs.c +++ b/lib/libshare/os/freebsd/nfs.c @@ -147,7 +147,7 @@ translate_opts(const char *shareopts) * This function copies all entries from the exports file to "filename", * omitting any entries for the specified mountpoint. */ -__attribute__((visibility("hidden"))) int +int nfs_copy_entries(char *filename, const char *mountpoint) { int error = SA_OK; diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c index c236f25698f2..bd578adeec5d 100644 --- a/lib/libshare/os/linux/nfs.c +++ b/lib/libshare/os/linux/nfs.c @@ -400,7 +400,7 @@ nfs_add_entry(const char *filename, const char *sharepath, * This function copies all entries from the exports file to "filename", * omitting any entries for the specified mountpoint. */ -__attribute__((visibility("hidden"))) int +int nfs_copy_entries(char *filename, const char *mountpoint) { char *buf = NULL; diff --git a/lib/libspl/include/libshare.h b/lib/libspl/include/libshare.h index ea53f8c15089..5d06b163a3ba 100644 --- a/lib/libspl/include/libshare.h +++ b/lib/libspl/include/libshare.h @@ -25,7 +25,7 @@ * Copyright (c) 2019, 2020 by Delphix. All rights reserved. */ #ifndef _LIBSPL_LIBSHARE_H -#define _LIBSPL_LIBSHARE_H +#define _LIBSPL_LIBSHARE_H extern __attribute__((visibility("default"))) /* API Initialization */ #define SA_INIT_SHARE_API 0x0001 /* init share specific interface */ @@ -71,16 +71,16 @@ #define SA_SHARE_EXISTS 33 /* path or file is already shared */ /* initialization */ -extern char *sa_errorstr(int); +_LIBSPL_LIBSHARE_H char *sa_errorstr(int); /* share control */ -extern int sa_enable_share(const char *, const char *, const char *, +_LIBSPL_LIBSHARE_H int sa_enable_share(const char *, const char *, const char *, char *); -extern int sa_disable_share(const char *, char *); -extern boolean_t sa_is_shared(const char *, char *); -extern void sa_commit_shares(const char *); +_LIBSPL_LIBSHARE_H int sa_disable_share(const char *, char *); +_LIBSPL_LIBSHARE_H boolean_t sa_is_shared(const char *, char *); +_LIBSPL_LIBSHARE_H void sa_commit_shares(const char *); /* protocol specific interfaces */ -extern int sa_validate_shareopts(char *, char *); +_LIBSPL_LIBSHARE_H int sa_validate_shareopts(char *, char *); #endif /* _LIBSPL_LIBSHARE_H */ diff --git a/lib/libtpool/Makefile.am b/lib/libtpool/Makefile.am index aa8bde32f963..40fd137b4335 100644 --- a/lib/libtpool/Makefile.am +++ b/lib/libtpool/Makefile.am @@ -1,5 +1,7 @@ include $(top_srcdir)/config/Rules.am +AM_CFLAGS += -fvisibility=hidden + noinst_LTLIBRARIES = libtpool.la USER_C = \ diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 3dd8b2b14510..9a1d95d96ce9 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -34,8 +34,6 @@ - - @@ -59,7 +57,6 @@ - @@ -350,7 +347,6 @@ - @@ -2686,13 +2682,6 @@ - - - - - - - @@ -2701,6 +2690,13 @@ + + + + + + + @@ -7576,6 +7572,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -7643,23 +7684,21 @@ - + - - - - - - - - - - + + + + - - - - + + + + + + + + @@ -7668,6 +7707,11 @@ + + + + + @@ -7686,93 +7730,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -7789,10 +7752,10 @@ - + - + diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am index 67e554dc8706..b2101e21144d 100644 --- a/lib/libzfs_core/Makefile.am +++ b/lib/libzfs_core/Makefile.am @@ -2,6 +2,8 @@ include $(top_srcdir)/config/Rules.am pkgconfig_DATA = libzfs_core.pc +AM_CFLAGS += -fvisibility=hidden + lib_LTLIBRARIES = libzfs_core.la include $(top_srcdir)/config/Abigail.am diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 79567d1504db..f397929e1afe 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -166,7 +166,6 @@ - @@ -240,7 +239,6 @@ - @@ -276,17 +274,12 @@ - - - - - @@ -1802,63 +1795,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -1870,14 +1811,14 @@ - + - + - + @@ -1888,16 +1829,16 @@ - + - + - + @@ -1909,20 +1850,20 @@ - + - + - + - - - + + + - + @@ -1931,10 +1872,10 @@ - + - + @@ -1942,74 +1883,74 @@ - - + + - + - + - + - + - - + + - + - + - - + + - + - - + + - - - - - + + + + + - - + + - - - + + + - + - + - - + + - + - + @@ -2019,25 +1960,13 @@ - - - - - - - - - - - - - - + + - - + + @@ -2046,7 +1975,7 @@ - + @@ -2072,12 +2001,27 @@ - - + + + + + + + + + + + + + + + + + - - + + @@ -2086,72 +2030,97 @@ - + - + - + - - + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - - - - - + + + + + - + - + - - + + @@ -2197,8 +2166,8 @@ - - + + @@ -2212,8 +2181,8 @@ - - + + @@ -2225,42 +2194,42 @@ - - + + - - + + - + - + + - - + - + - + - + @@ -2275,12 +2244,12 @@ - + - - + + @@ -2303,21 +2272,21 @@ - - + + - + - + - + - + - + @@ -2326,15 +2295,15 @@ - - - + + + - - + + - + @@ -2344,13 +2313,13 @@ - + - + @@ -2359,28 +2328,28 @@ - + - + - + - - + + - + @@ -2390,7 +2359,7 @@ - + @@ -2402,7 +2371,7 @@ - + @@ -2431,7 +2400,7 @@ - + @@ -2457,25 +2426,25 @@ - - - - + + + + - + - - + + - - - + + + - - + + @@ -2490,14 +2459,14 @@ - - + + - - + + @@ -2519,15 +2488,15 @@ - + - + - + - + @@ -2536,10 +2505,10 @@ - + - - + + @@ -2555,7 +2524,7 @@ - + @@ -2569,16 +2538,16 @@ - + - + - + - + @@ -2587,27 +2556,27 @@ - + - + - + - - - - + + + + - + - + @@ -2619,65 +2588,65 @@ - - + + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - - + + - - + + - + - + @@ -2688,27 +2657,27 @@ - - + + - + - - + + - + - + - + - - + + @@ -2716,23 +2685,19 @@ - - - - - + - + - + @@ -2744,122 +2709,111 @@ - + - + - + - + - - - + + + - + - - + + - - + + - + - - + + - - + + - + - + - - - - - - - - - - - - - - + + + - + - + - - + + - - + + - + - + - - - + + + - - + + - + - + - + - + - + - + @@ -2878,143 +2832,145 @@ - - + + - + + + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - - + + - - + + - + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - + - + @@ -3038,38 +2994,39 @@ - - - + + + + - + - + - + - + - + - + - + - + - + - + @@ -3078,19 +3035,19 @@ - + - + - + - + - - + + @@ -3099,212 +3056,212 @@ - + - - + + - - + + - + - + - - - - + + + + - + - + - - + + - - + + - - + + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - - + + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - - + + - - + + - - + + - + - - + + - + - + - + - - + + - + - + - + - + - - + + - - + + - + - - + + - + - + @@ -3312,56 +3269,56 @@ - + - + - + - + - + - + - + - - + + - + - + - + - + - + @@ -3370,603 +3327,603 @@ - - + + - + - - - + + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - - - + + + - - + + - - + + - + - + - + - + - + - - + + - - + + - + - - + + - - + + - - + + - + - + - + - + - + - - + + - - + + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - - + + - - - + + + - + - + - + - + - + - + - + - + - - + + - - + + - - + + - - - + + + - + - + - + - + - + - + - + - + - - + + - - + + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - - - - + + + + - - - - + + + + - - - - + + + + - + - + - - - - + + + + - + - + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - + - + @@ -3996,7 +3953,7 @@ - + @@ -4034,10 +3991,10 @@ - + - + @@ -4046,28 +4003,28 @@ - + - + - + - + - + - + - + - + @@ -4079,36 +4036,36 @@ - + - - - - + + + + - - + + - - - - + + + + - - + + - + - - - + + + @@ -4122,14 +4079,14 @@ - + - - - + + + - + @@ -4149,24 +4106,24 @@ - + - - + + - + - + - - + + - + @@ -4186,76 +4143,76 @@ - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + + + + + + + - + - + - - + + - + @@ -4263,126 +4220,126 @@ - + - + - + - + - - - + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - - + + - - + + - + - + - + - + - + - + - - + + - + @@ -4418,16 +4375,16 @@ - + - + - + @@ -4462,49 +4419,35 @@ - - - + + + - - + + - - + + - + - + - + - - - - - - - - - - - - - - - + @@ -4513,7 +4456,7 @@ - + @@ -4526,18 +4469,18 @@ - + - - + + - + - + @@ -4562,7 +4505,7 @@ - + diff --git a/lib/libzfsbootenv/Makefile.am b/lib/libzfsbootenv/Makefile.am index 984df0b8a353..0c454a5e031b 100644 --- a/lib/libzfsbootenv/Makefile.am +++ b/lib/libzfsbootenv/Makefile.am @@ -2,6 +2,8 @@ include $(top_srcdir)/config/Rules.am pkgconfig_DATA = libzfsbootenv.pc +AM_CFLAGS += -fvisibility=hidden + lib_LTLIBRARIES = libzfsbootenv.la include $(top_srcdir)/config/Abigail.am diff --git a/lib/libzutil/Makefile.am b/lib/libzutil/Makefile.am index 2f0357e9f900..0bc29f05e0fb 100644 --- a/lib/libzutil/Makefile.am +++ b/lib/libzutil/Makefile.am @@ -3,6 +3,7 @@ include $(top_srcdir)/config/Rules.am # Suppress unused but set variable warnings often due to ASSERTs AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE) AM_CFLAGS += $(LIBBLKID_CFLAGS) $(LIBUDEV_CFLAGS) +AM_CFLAGS += -fvisibility=hidden DEFAULT_INCLUDES += -I$(srcdir) -- cgit v1.2.3 From 860051f1d1ef7ee995188b852d8da36bce85b1dc Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Wed, 9 Jun 2021 20:57:57 -0400 Subject: Added error for writing to /dev/ on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting in Linux 5.10, trying to write to /dev/{null,zero} errors out. Prefer to inform people when this happens rather than hoping they guess what's wrong. Reviewed-by: Antonio Russo Reviewed-by: Ahelenia Ziemiańska Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Rich Ercolani Closes: #11991 --- cmd/zfs/zfs_main.c | 38 ++++++++++++++++++++++++++++++++++++-- lib/libzfs/libzfs_sendrecv.c | 1 + 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index c583053dbaec..38bfdc91aeaf 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -728,6 +728,32 @@ finish_progress(char *done) pt_header = NULL; } +/* This function checks if the passed fd refers to /dev/null or /dev/zero */ +#ifdef __linux__ +static boolean_t +is_dev_nullzero(int fd) +{ + struct stat st; + fstat(fd, &st); + return (major(st.st_rdev) == 1 && (minor(st.st_rdev) == 3 /* null */ || + minor(st.st_rdev) == 5 /* zero */)); +} +#endif + +static void +note_dev_error(int err, int fd) +{ +#ifdef __linux__ + if (err == EINVAL && is_dev_nullzero(fd)) { + (void) fprintf(stderr, + gettext("Error: Writing directly to /dev/{null,zero} files" + " on certain kernels is not currently implemented.\n" + "(As a workaround, " + "try \"zfs send [...] | cat > /dev/null\")\n")); + } +#endif +} + static int zfs_mount_and_share(libzfs_handle_t *hdl, const char *dataset, zfs_type_t type) { @@ -4572,11 +4598,16 @@ zfs_do_send(int argc, char **argv) err = zfs_send_saved(zhp, &flags, STDOUT_FILENO, resume_token); + if (err != 0) + note_dev_error(errno, STDOUT_FILENO); zfs_close(zhp); return (err != 0); } else if (resume_token != NULL) { - return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, - resume_token)); + err = zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, + resume_token); + if (err != 0) + note_dev_error(errno, STDOUT_FILENO); + return (err); } if (flags.skipmissing && !flags.replicate) { @@ -4627,6 +4658,8 @@ zfs_do_send(int argc, char **argv) err = zfs_send_one(zhp, fromname, STDOUT_FILENO, &flags, redactbook); zfs_close(zhp); + if (err != 0) + note_dev_error(errno, STDOUT_FILENO); return (err != 0); } @@ -4703,6 +4736,7 @@ zfs_do_send(int argc, char **argv) nvlist_free(dbgnv); } zfs_close(zhp); + note_dev_error(errno, STDOUT_FILENO); return (err != 0); } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 2ba673fd09f7..136255786cc1 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -849,6 +849,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, case ERANGE: case EFAULT: case EROFS: + case EINVAL: zfs_error_aux(hdl, "%s", strerror(errno)); return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); -- cgit v1.2.3 From 371f88d96fe0aeb46a72fec78f90e1d777493ee5 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 10 Jun 2021 11:27:33 -0400 Subject: Remove pool io kstats (#12212) This mostly reverts "3537 want pool io kstats" commit of 8 years ago. From one side this code using pool-wide locks became pretty bad for performance, creating significant lock contention in I/O pipeline. From another, there are more efficient ways now to obtain detailed statistics, while this statistics is illumos-specific and much less usable on Linux and FreeBSD, reported only via procfs/sysctls. This commit does not remove KSTAT_TYPE_IO implementation, that may be removed later together with already unused KSTAT_TYPE_INTR and KSTAT_TYPE_TIMER. Reviewed-by: Matthew Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #12212 --- include/os/freebsd/spl/sys/kstat.h | 4 --- include/os/linux/spl/sys/kstat.h | 4 --- include/sys/spa.h | 1 - include/sys/zfs_context.h | 6 ---- lib/libspl/include/sys/kstat.h | 6 ---- lib/libzpool/kernel.c | 30 ----------------- module/os/freebsd/spl/spl_kstat.c | 62 ----------------------------------- module/os/linux/spl/spl-kstat.c | 66 -------------------------------------- module/zfs/spa_stats.c | 50 ----------------------------- module/zfs/vdev_queue.c | 47 --------------------------- 10 files changed, 276 deletions(-) diff --git a/include/os/freebsd/spl/sys/kstat.h b/include/os/freebsd/spl/sys/kstat.h index f5157c7f4fe3..947dfee62393 100644 --- a/include/os/freebsd/spl/sys/kstat.h +++ b/include/os/freebsd/spl/sys/kstat.h @@ -210,10 +210,6 @@ extern kstat_t *__kstat_create(const char *ks_module, int ks_instance, extern void __kstat_install(kstat_t *ksp); extern void __kstat_delete(kstat_t *ksp); -extern void kstat_waitq_enter(kstat_io_t *); -extern void kstat_waitq_exit(kstat_io_t *); -extern void kstat_runq_enter(kstat_io_t *); -extern void kstat_runq_exit(kstat_io_t *); #define kstat_set_seq_raw_ops(k, h, d, a) \ __kstat_set_seq_raw_ops(k, h, d, a) diff --git a/include/os/linux/spl/sys/kstat.h b/include/os/linux/spl/sys/kstat.h index 905d8257c8d3..928f70757545 100644 --- a/include/os/linux/spl/sys/kstat.h +++ b/include/os/linux/spl/sys/kstat.h @@ -206,10 +206,6 @@ extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, extern void __kstat_install(kstat_t *ksp); extern void __kstat_delete(kstat_t *ksp); -extern void kstat_waitq_enter(kstat_io_t *); -extern void kstat_waitq_exit(kstat_io_t *); -extern void kstat_runq_enter(kstat_io_t *); -extern void kstat_runq_exit(kstat_io_t *); #define kstat_set_raw_ops(k, h, d, a) \ __kstat_set_raw_ops(k, h, d, a) diff --git a/include/sys/spa.h b/include/sys/spa.h index 374d36e7327e..d37c6c923d8c 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -895,7 +895,6 @@ typedef struct spa_stats { spa_history_list_t read_history; spa_history_list_t txg_history; spa_history_kstat_t tx_assign_histogram; - spa_history_kstat_t io_history; spa_history_list_t mmp_history; spa_history_kstat_t state; /* pool state */ spa_history_kstat_t iostats; diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 89afa98253f7..aa4338ed2859 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -360,12 +360,6 @@ extern kstat_t *kstat_create(const char *, int, const char *, const char *, uchar_t, ulong_t, uchar_t); extern void kstat_install(kstat_t *); extern void kstat_delete(kstat_t *); -extern void kstat_waitq_enter(kstat_io_t *); -extern void kstat_waitq_exit(kstat_io_t *); -extern void kstat_runq_enter(kstat_io_t *); -extern void kstat_runq_exit(kstat_io_t *); -extern void kstat_waitq_to_runq(kstat_io_t *); -extern void kstat_runq_back_to_waitq(kstat_io_t *); extern void kstat_set_raw_ops(kstat_t *ksp, int (*headers)(char *buf, size_t size), int (*data)(char *buf, size_t size, void *data), diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index 69fb6d401fc7..f73fb92eb797 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -796,12 +796,6 @@ extern void kstat_delete_byname(const char *, int, const char *); extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t); extern void kstat_named_init(kstat_named_t *, const char *, uchar_t); extern void kstat_timer_init(kstat_timer_t *, const char *); -extern void kstat_waitq_enter(kstat_io_t *); -extern void kstat_waitq_exit(kstat_io_t *); -extern void kstat_runq_enter(kstat_io_t *); -extern void kstat_runq_exit(kstat_io_t *); -extern void kstat_waitq_to_runq(kstat_io_t *); -extern void kstat_runq_back_to_waitq(kstat_io_t *); extern void kstat_timer_start(kstat_timer_t *); extern void kstat_timer_stop(kstat_timer_t *); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index e96a1d7521d9..cc8e534e7eb5 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -146,36 +146,6 @@ void kstat_delete(kstat_t *ksp) {} -/*ARGSUSED*/ -void -kstat_waitq_enter(kstat_io_t *kiop) -{} - -/*ARGSUSED*/ -void -kstat_waitq_exit(kstat_io_t *kiop) -{} - -/*ARGSUSED*/ -void -kstat_runq_enter(kstat_io_t *kiop) -{} - -/*ARGSUSED*/ -void -kstat_runq_exit(kstat_io_t *kiop) -{} - -/*ARGSUSED*/ -void -kstat_waitq_to_runq(kstat_io_t *kiop) -{} - -/*ARGSUSED*/ -void -kstat_runq_back_to_waitq(kstat_io_t *kiop) -{} - void kstat_set_raw_ops(kstat_t *ksp, int (*headers)(char *buf, size_t size), diff --git a/module/os/freebsd/spl/spl_kstat.c b/module/os/freebsd/spl/spl_kstat.c index e591921ace1b..059ada235c4a 100644 --- a/module/os/freebsd/spl/spl_kstat.c +++ b/module/os/freebsd/spl/spl_kstat.c @@ -508,65 +508,3 @@ kstat_delete(kstat_t *ksp) kmem_free(ksp->ks_data, ksp->ks_data_size); free(ksp, M_KSTAT); } - -void -kstat_waitq_enter(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t wcnt; - - new = gethrtime(); - delta = new - kiop->wlastupdate; - kiop->wlastupdate = new; - wcnt = kiop->wcnt++; - if (wcnt != 0) { - kiop->wlentime += delta * wcnt; - kiop->wtime += delta; - } -} - -void -kstat_waitq_exit(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t wcnt; - - new = gethrtime(); - delta = new - kiop->wlastupdate; - kiop->wlastupdate = new; - wcnt = kiop->wcnt--; - ASSERT3S(wcnt, >, 0); - kiop->wlentime += delta * wcnt; - kiop->wtime += delta; -} - -void -kstat_runq_enter(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t rcnt; - - new = gethrtime(); - delta = new - kiop->rlastupdate; - kiop->rlastupdate = new; - rcnt = kiop->rcnt++; - if (rcnt != 0) { - kiop->rlentime += delta * rcnt; - kiop->rtime += delta; - } -} - -void -kstat_runq_exit(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t rcnt; - - new = gethrtime(); - delta = new - kiop->rlastupdate; - kiop->rlastupdate = new; - rcnt = kiop->rcnt--; - ASSERT3S(rcnt, >, 0); - kiop->rlentime += delta * rcnt; - kiop->rtime += delta; -} diff --git a/module/os/linux/spl/spl-kstat.c b/module/os/linux/spl/spl-kstat.c index c7f1aadf784e..0c46708326d8 100644 --- a/module/os/linux/spl/spl-kstat.c +++ b/module/os/linux/spl/spl-kstat.c @@ -50,72 +50,6 @@ kstat_resize_raw(kstat_t *ksp) return (0); } -void -kstat_waitq_enter(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t wcnt; - - new = gethrtime(); - delta = new - kiop->wlastupdate; - kiop->wlastupdate = new; - wcnt = kiop->wcnt++; - if (wcnt != 0) { - kiop->wlentime += delta * wcnt; - kiop->wtime += delta; - } -} -EXPORT_SYMBOL(kstat_waitq_enter); - -void -kstat_waitq_exit(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t wcnt; - - new = gethrtime(); - delta = new - kiop->wlastupdate; - kiop->wlastupdate = new; - wcnt = kiop->wcnt--; - ASSERT((int)wcnt > 0); - kiop->wlentime += delta * wcnt; - kiop->wtime += delta; -} -EXPORT_SYMBOL(kstat_waitq_exit); - -void -kstat_runq_enter(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t rcnt; - - new = gethrtime(); - delta = new - kiop->rlastupdate; - kiop->rlastupdate = new; - rcnt = kiop->rcnt++; - if (rcnt != 0) { - kiop->rlentime += delta * rcnt; - kiop->rtime += delta; - } -} -EXPORT_SYMBOL(kstat_runq_enter); - -void -kstat_runq_exit(kstat_io_t *kiop) -{ - hrtime_t new, delta; - ulong_t rcnt; - - new = gethrtime(); - delta = new - kiop->rlastupdate; - kiop->rlastupdate = new; - rcnt = kiop->rcnt--; - ASSERT((int)rcnt > 0); - kiop->rlentime += delta * rcnt; - kiop->rtime += delta; -} -EXPORT_SYMBOL(kstat_runq_exit); - static int kstat_seq_show_headers(struct seq_file *f) { diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index c3eacc14239e..534ac72fee7b 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -548,54 +548,6 @@ spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs) atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64); } -/* - * ========================================================================== - * SPA IO History Routines - * ========================================================================== - */ -static int -spa_io_history_update(kstat_t *ksp, int rw) -{ - if (rw == KSTAT_WRITE) - memset(ksp->ks_data, 0, ksp->ks_data_size); - - return (0); -} - -static void -spa_io_history_init(spa_t *spa) -{ - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - char *name; - kstat_t *ksp; - - mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); - - name = kmem_asprintf("zfs/%s", spa_name(spa)); - - ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0); - shk->kstat = ksp; - - if (ksp) { - ksp->ks_lock = &shk->lock; - ksp->ks_private = spa; - ksp->ks_update = spa_io_history_update; - kstat_install(ksp); - } - kmem_strfree(name); -} - -static void -spa_io_history_destroy(spa_t *spa) -{ - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - - if (shk->kstat) - kstat_delete(shk->kstat); - - mutex_destroy(&shk->lock); -} - /* * ========================================================================== * SPA MMP History Routines @@ -996,7 +948,6 @@ spa_stats_init(spa_t *spa) spa_read_history_init(spa); spa_txg_history_init(spa); spa_tx_assign_init(spa); - spa_io_history_init(spa); spa_mmp_history_init(spa); spa_state_init(spa); spa_iostats_init(spa); @@ -1010,7 +961,6 @@ spa_stats_destroy(spa_t *spa) spa_tx_assign_destroy(spa); spa_txg_history_destroy(spa); spa_read_history_destroy(spa); - spa_io_history_destroy(spa); spa_mmp_history_destroy(spa); } diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 25a4bc69cc23..198861edb816 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -35,8 +35,6 @@ #include #include #include -#include -#include #include /* @@ -516,35 +514,17 @@ vdev_queue_fini(vdev_t *vd) static void vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) { - spa_t *spa = zio->io_spa; - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); avl_add(vdev_queue_type_tree(vq, zio->io_type), zio); - - if (shk->kstat != NULL) { - mutex_enter(&shk->lock); - kstat_waitq_enter(shk->kstat->ks_data); - mutex_exit(&shk->lock); - } } static void vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) { - spa_t *spa = zio->io_spa; - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio); - - if (shk->kstat != NULL) { - mutex_enter(&shk->lock); - kstat_waitq_exit(shk->kstat->ks_data); - mutex_exit(&shk->lock); - } } static boolean_t @@ -564,9 +544,6 @@ vdev_queue_is_interactive(zio_priority_t p) static void vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) { - spa_t *spa = zio->io_spa; - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active++; @@ -577,20 +554,11 @@ vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) vq->vq_nia_credit--; } avl_add(&vq->vq_active_tree, zio); - - if (shk->kstat != NULL) { - mutex_enter(&shk->lock); - kstat_runq_enter(shk->kstat->ks_data); - mutex_exit(&shk->lock); - } } static void vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) { - spa_t *spa = zio->io_spa; - spa_history_kstat_t *shk = &spa->spa_stats.io_history; - ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active--; @@ -602,21 +570,6 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) } else if (vq->vq_ia_active == 0) vq->vq_nia_credit++; avl_remove(&vq->vq_active_tree, zio); - - if (shk->kstat != NULL) { - kstat_io_t *ksio = shk->kstat->ks_data; - - mutex_enter(&shk->lock); - kstat_runq_exit(ksio); - if (zio->io_type == ZIO_TYPE_READ) { - ksio->reads++; - ksio->nread += zio->io_size; - } else if (zio->io_type == ZIO_TYPE_WRITE) { - ksio->writes++; - ksio->nwritten += zio->io_size; - } - mutex_exit(&shk->lock); - } } static void -- cgit v1.2.3 From eec5ba113e1d285d445333079a3e8184872ad00a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 10 Jun 2021 18:26:37 +0200 Subject: dracut: 90zfs: respect zfs_force=1 on systemd systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systemd systems provide an environment generator in order to respect the zfs_force=1 kernel command line option. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Nguyen Signed-off-by: Ahelenia Ziemiańska Closes #11403 Closes #12195 --- contrib/dracut/90zfs/.gitignore | 13 ++---------- contrib/dracut/90zfs/Makefile.am | 3 ++- contrib/dracut/90zfs/import-opts-generator.sh.in | 5 +++++ contrib/dracut/90zfs/module-setup.sh.in | 27 +++++++++++++++++------- etc/systemd/system/zfs-import-cache.service.in | 2 +- etc/systemd/system/zfs-import-scan.service.in | 2 +- 6 files changed, 30 insertions(+), 22 deletions(-) create mode 100755 contrib/dracut/90zfs/import-opts-generator.sh.in diff --git a/contrib/dracut/90zfs/.gitignore b/contrib/dracut/90zfs/.gitignore index dce24393479b..cb84212f3a2a 100644 --- a/contrib/dracut/90zfs/.gitignore +++ b/contrib/dracut/90zfs/.gitignore @@ -1,11 +1,2 @@ -export-zfs.sh -module-setup.sh -mount-zfs.sh -parse-zfs.sh -zfs-generator.sh -zfs-lib.sh -zfs-load-key.sh -zfs-needshutdown.sh -zfs-env-bootfs.service -zfs-snapshot-bootfs.service -zfs-rollback-bootfs.service +*.sh +*.service diff --git a/contrib/dracut/90zfs/Makefile.am b/contrib/dracut/90zfs/Makefile.am index ff3a2b27f24e..3f7050300994 100644 --- a/contrib/dracut/90zfs/Makefile.am +++ b/contrib/dracut/90zfs/Makefile.am @@ -10,7 +10,8 @@ pkgdracut_SCRIPTS = \ zfs-generator.sh \ zfs-load-key.sh \ zfs-needshutdown.sh \ - zfs-lib.sh + zfs-lib.sh \ + import-opts-generator.sh pkgdracut_DATA = \ zfs-env-bootfs.service \ diff --git a/contrib/dracut/90zfs/import-opts-generator.sh.in b/contrib/dracut/90zfs/import-opts-generator.sh.in new file mode 100755 index 000000000000..8bc8c9b35b73 --- /dev/null +++ b/contrib/dracut/90zfs/import-opts-generator.sh.in @@ -0,0 +1,5 @@ +#!/bin/sh + +. /lib/dracut-zfs-lib.sh + +echo ZPOOL_IMPORT_OPTS="$ZPOOL_IMPORT_OPTS" diff --git a/contrib/dracut/90zfs/module-setup.sh.in b/contrib/dracut/90zfs/module-setup.sh.in index 817da5b2b4a9..a4b62da1f742 100755 --- a/contrib/dracut/90zfs/module-setup.sh.in +++ b/contrib/dracut/90zfs/module-setup.sh.in @@ -104,29 +104,40 @@ install() { if dracut_module_included "systemd"; then mkdir -p "${initdir}/$systemdsystemunitdir/zfs-import.target.wants" - for _item in scan cache ; do - dracut_install @systemdunitdir@/zfs-import-$_item.service - if ! [ -L "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"/zfs-import-$_item.service ]; then - ln -s ../zfs-import-$_item.service "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"/zfs-import-$_item.service - type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-import-$_item.service + for _service in "zfs-import-scan.service" "zfs-import-cache.service" ; do + dracut_install "@systemdunitdir@/$_service" + if ! [ -L "${initdir}/$systemdsystemunitdir/zfs-import.target.wants/$_service" ]; then + ln -sf ../$_service "${initdir}/$systemdsystemunitdir/zfs-import.target.wants/$_service" + type mark_hostonly >/dev/null 2>&1 && mark_hostonly "@systemdunitdir@/$_service" fi done + inst "${moddir}"/zfs-env-bootfs.service "${systemdsystemunitdir}"/zfs-env-bootfs.service ln -s ../zfs-env-bootfs.service "${initdir}/${systemdsystemunitdir}/zfs-import.target.wants"/zfs-env-bootfs.service type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-env-bootfs.service + dracut_install systemd-ask-password dracut_install systemd-tty-ask-password-agent + mkdir -p "${initdir}/$systemdsystemunitdir/initrd.target.wants" dracut_install @systemdunitdir@/zfs-import.target if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target ]; then ln -s ../zfs-import.target "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-import.target fi + for _service in zfs-snapshot-bootfs.service zfs-rollback-bootfs.service ; do - inst "${moddir}"/$_service "${systemdsystemunitdir}"/$_service - if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants"/$_service ]; then - ln -s ../$_service "${initdir}/$systemdsystemunitdir/initrd.target.wants"/$_service + inst "${moddir}/$_service" "${systemdsystemunitdir}/$_service" + if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants/$_service" ]; then + ln -s "../$_service" "${initdir}/$systemdsystemunitdir/initrd.target.wants/$_service" fi done + + # There isn't a pkg-config variable for this, + # and dracut doesn't automatically resolve anything this'd be next to + local systemdsystemenvironmentgeneratordir + systemdsystemenvironmentgeneratordir="$(pkg-config --variable=prefix systemd || echo "/usr")/lib/systemd/system-environment-generators" + mkdir -p "${initdir}/${systemdsystemenvironmentgeneratordir}" + inst "${moddir}"/import-opts-generator.sh "${systemdsystemenvironmentgeneratordir}"/zfs-import-opts.sh fi } diff --git a/etc/systemd/system/zfs-import-cache.service.in b/etc/systemd/system/zfs-import-cache.service.in index 0d236fe9e468..b09961a8bb84 100644 --- a/etc/systemd/system/zfs-import-cache.service.in +++ b/etc/systemd/system/zfs-import-cache.service.in @@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs [Service] Type=oneshot RemainAfterExit=yes -ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN +ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN ${ZPOOL_IMPORT_OPTS} [Install] WantedBy=zfs-import.target diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in index f0317e23e508..c1111c73a607 100644 --- a/etc/systemd/system/zfs-import-scan.service.in +++ b/etc/systemd/system/zfs-import-scan.service.in @@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs [Service] Type=oneshot RemainAfterExit=yes -ExecStart=@sbindir@/zpool import -aN -o cachefile=none +ExecStart=@sbindir@/zpool import -aN -o cachefile=none ${ZPOOL_IMPORT_OPTS} [Install] WantedBy=zfs-import.target -- cgit v1.2.3 From ffdf019cb317b16109ec74ae199d4253a6662f5a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 10 Jun 2021 12:42:31 -0400 Subject: Re-embed multilist_t storage This commit partially reverts changes to multilists in PR 7968 (multi-threaded spa-sync()) and adds some cache line alignments to separate read-only multilists and heavily modified refcount's to different cache lines. Reviewed-by: Matthew Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-by: iXsystems, Inc. Closes #12158 --- include/sys/arc_impl.h | 12 +++---- include/sys/dmu_objset.h | 4 +-- include/sys/metaslab_impl.h | 2 +- include/sys/multilist.h | 3 +- module/zfs/arc.c | 86 ++++++++++++++++++++++----------------------- module/zfs/dbuf.c | 20 +++++------ module/zfs/dmu_objset.c | 30 ++++++++-------- module/zfs/dnode.c | 2 +- module/zfs/dsl_dataset.c | 3 +- module/zfs/dsl_pool.c | 3 +- module/zfs/metaslab.c | 24 ++++++------- module/zfs/multilist.c | 14 ++++---- 12 files changed, 99 insertions(+), 104 deletions(-) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 94123fc10e67..c01da46e01e3 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -74,20 +74,20 @@ typedef struct arc_state { /* * list of evictable buffers */ - multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; + multilist_t arcs_list[ARC_BUFC_NUMTYPES]; + /* + * supports the "dbufs" kstat + */ + arc_state_type_t arcs_state; /* * total amount of evictable data in this state */ - zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; + zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned; /* * total amount of data in this state; this includes: evictable, * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. */ zfs_refcount_t arcs_size; - /* - * supports the "dbufs" kstat - */ - arc_state_type_t arcs_state; } arc_state_t; typedef struct arc_callback arc_callback_t; diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a8cb812714ec..e89ee64ea686 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -153,7 +153,7 @@ struct objset { /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ zil_header_t os_zil_header; - multilist_t *os_synced_dnodes; + multilist_t os_synced_dnodes; uint64_t os_flags; uint64_t os_freed_dnodes; boolean_t os_rescan_dnodes; @@ -172,7 +172,7 @@ struct objset { /* Protected by os_lock */ kmutex_t os_lock; - multilist_t *os_dirty_dnodes[TXG_SIZE]; + multilist_t os_dirty_dnodes[TXG_SIZE]; list_t os_dnodes; list_t os_downgraded_dbufs; diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 3be0c466c403..9924c3ba0eaa 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -206,7 +206,7 @@ struct metaslab_class { * List of all loaded metaslabs in the class, sorted in order of most * recent use. */ - multilist_t *mc_metaslab_txg_list; + multilist_t mc_metaslab_txg_list; metaslab_class_allocator_t mc_allocator[]; }; diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 0c7b4075d9a3..26f37c37ab38 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -71,8 +71,9 @@ struct multilist { multilist_sublist_index_func_t *ml_index_func; }; +void multilist_create(multilist_t *, size_t, size_t, + multilist_sublist_index_func_t *); void multilist_destroy(multilist_t *); -multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *); void multilist_insert(multilist_t *, void *); void multilist_remove(multilist_t *, void *); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 5526cae378fb..737904f34855 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2327,7 +2327,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag) (state != arc_anon)) { /* We don't use the L2-only state list. */ if (state != arc_l2c_only) { - multilist_remove(state->arcs_list[arc_buf_type(hdr)], + multilist_remove(&state->arcs_list[arc_buf_type(hdr)], hdr); arc_evictable_space_decrement(hdr, state); } @@ -2361,7 +2361,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) */ if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && (state != arc_anon)) { - multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr); + multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr); ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); arc_evictable_space_increment(hdr, state); } @@ -2464,7 +2464,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (refcnt == 0) { if (old_state != arc_anon && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_remove(old_state->arcs_list[buftype], hdr); + multilist_remove(&old_state->arcs_list[buftype], hdr); if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); @@ -2481,7 +2481,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, * beforehand. */ ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_insert(new_state->arcs_list[buftype], hdr); + multilist_insert(&new_state->arcs_list[buftype], hdr); if (GHOST_STATE(new_state)) { ASSERT0(bufcnt); @@ -2633,8 +2633,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, * L2 headers should never be on the L2 state list since they don't * have L1 headers allocated. */ - ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && - multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); + ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && + multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); } void @@ -4200,7 +4200,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes, arc_buf_contents_t type) { uint64_t total_evicted = 0; - multilist_t *ml = state->arcs_list[type]; + multilist_t *ml = &state->arcs_list[type]; int num_sublists; arc_buf_hdr_t **markers; @@ -4534,8 +4534,8 @@ arc_evict_meta(uint64_t meta_used) static arc_buf_contents_t arc_evict_type(arc_state_t *state) { - multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA]; - multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA]; + multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA]; + multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA]; int data_idx = multilist_get_random_index(data_ml); int meta_idx = multilist_get_random_index(meta_ml); multilist_sublist_t *data_mls; @@ -7455,44 +7455,44 @@ arc_state_init(void) arc_mfu_ghost = &ARC_mfu_ghost; arc_l2c_only = &ARC_l2c_only; - arc_mru->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru_ghost->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_l2c_only->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_l2c_only->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); @@ -7558,16 +7558,16 @@ arc_state_fini(void) zfs_refcount_destroy(&arc_mfu_ghost->arcs_size); zfs_refcount_destroy(&arc_l2c_only->arcs_size); - multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); aggsum_fini(&arc_meta_used); aggsum_fini(&arc_size); @@ -8624,16 +8624,16 @@ l2arc_sublist_lock(int list_num) switch (list_num) { case 0: - ml = arc_mfu->arcs_list[ARC_BUFC_METADATA]; + ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA]; break; case 1: - ml = arc_mru->arcs_list[ARC_BUFC_METADATA]; + ml = &arc_mru->arcs_list[ARC_BUFC_METADATA]; break; case 2: - ml = arc_mfu->arcs_list[ARC_BUFC_DATA]; + ml = &arc_mfu->arcs_list[ARC_BUFC_DATA]; break; case 3: - ml = arc_mru->arcs_list[ARC_BUFC_DATA]; + ml = &arc_mru->arcs_list[ARC_BUFC_DATA]; break; default: return (NULL); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 764383b2d039..9d741545fae3 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -203,8 +203,8 @@ static boolean_t dbuf_evict_thread_exit; * by those caches' matching enum values (from dbuf_cached_state_t). */ typedef struct dbuf_cache { - multilist_t *cache; - zfs_refcount_t size; + multilist_t cache; + zfs_refcount_t size ____cacheline_aligned; } dbuf_cache_t; dbuf_cache_t dbuf_caches[DB_CACHE_MAX]; @@ -667,9 +667,9 @@ dbuf_cache_above_lowater(void) static void dbuf_evict_one(void) { - int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache); + int idx = multilist_get_random_index(&dbuf_caches[DB_DBUF_CACHE].cache); multilist_sublist_t *mls = multilist_sublist_lock( - dbuf_caches[DB_DBUF_CACHE].cache, idx); + &dbuf_caches[DB_DBUF_CACHE].cache, idx); ASSERT(!MUTEX_HELD(&dbuf_evict_lock)); @@ -833,8 +833,8 @@ retry: dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0); for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { - dbuf_caches[dcs].cache = - multilist_create(sizeof (dmu_buf_impl_t), + multilist_create(&dbuf_caches[dcs].cache, + sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_cache_link), dbuf_cache_multilist_index_func); zfs_refcount_create(&dbuf_caches[dcs].size); @@ -901,7 +901,7 @@ dbuf_fini(void) for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { zfs_refcount_destroy(&dbuf_caches[dcs].size); - multilist_destroy(dbuf_caches[dcs].cache); + multilist_destroy(&dbuf_caches[dcs].cache); } if (dbuf_ksp != NULL) { @@ -2755,7 +2755,7 @@ dbuf_destroy(dmu_buf_impl_t *db) ASSERT(db->db_caching_status == DB_DBUF_CACHE || db->db_caching_status == DB_DBUF_METADATA_CACHE); - multilist_remove(dbuf_caches[db->db_caching_status].cache, db); + multilist_remove(&dbuf_caches[db->db_caching_status].cache, db); (void) zfs_refcount_remove_many( &dbuf_caches[db->db_caching_status].size, db->db.db_size, db); @@ -3465,7 +3465,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT(db->db_caching_status == DB_DBUF_CACHE || db->db_caching_status == DB_DBUF_METADATA_CACHE); - multilist_remove(dbuf_caches[db->db_caching_status].cache, db); + multilist_remove(&dbuf_caches[db->db_caching_status].cache, db); (void) zfs_refcount_remove_many( &dbuf_caches[db->db_caching_status].size, db->db.db_size, db); @@ -3707,7 +3707,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; db->db_caching_status = dcs; - multilist_insert(dbuf_caches[dcs].cache, db); + multilist_insert(&dbuf_caches[dcs].cache, db); size = zfs_refcount_add_many( &dbuf_caches[dcs].size, db->db.db_size, db); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index bfb4adf262d5..8c244dc4c317 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -601,7 +601,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { - os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t), + multilist_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i]), dnode_multilist_index_func); } @@ -995,9 +995,8 @@ dmu_objset_evict_done(objset_t *os) mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_user_ptr_lock); mutex_destroy(&os->os_upgrade_lock); - for (int i = 0; i < TXG_SIZE; i++) { - multilist_destroy(os->os_dirty_dnodes[i]); - } + for (int i = 0; i < TXG_SIZE; i++) + multilist_destroy(&os->os_dirty_dnodes[i]); spa_evicting_os_deregister(os->os_spa, os); kmem_free(os, sizeof (objset_t)); } @@ -1520,7 +1519,7 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx) * of why this dnode hold is always needed (even when not * doing user accounting). */ - multilist_t *newlist = dn->dn_objset->os_synced_dnodes; + multilist_t *newlist = &dn->dn_objset->os_synced_dnodes; (void) dnode_add_ref(dn, newlist); multilist_insert(newlist, dn); @@ -1689,17 +1688,16 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) * dn_dirty_link[] of this txg. But it may already * exist because we call dsl_dataset_sync() twice per txg. */ - if (os->os_synced_dnodes == NULL) { - os->os_synced_dnodes = - multilist_create(sizeof (dnode_t), + if (os->os_synced_dnodes.ml_sublists == NULL) { + multilist_create(&os->os_synced_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff]), dnode_multilist_index_func); } else { - ASSERT3U(os->os_synced_dnodes->ml_offset, ==, + ASSERT3U(os->os_synced_dnodes.ml_offset, ==, offsetof(dnode_t, dn_dirty_link[txgoff])); } - ml = os->os_dirty_dnodes[txgoff]; + ml = &os->os_dirty_dnodes[txgoff]; num_sublists = multilist_get_num_sublists(ml); for (int i = 0; i < num_sublists; i++) { if (multilist_sublist_is_empty_idx(ml, i)) @@ -1738,7 +1736,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg) { - return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK])); + return (!multilist_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK])); } static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES]; @@ -1949,7 +1947,7 @@ userquota_updates_task(void *arg) userquota_cache_t cache = { { 0 } }; multilist_sublist_t *list = - multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); + multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx); ASSERT(multilist_sublist_head(list) == NULL || dmu_objset_userused_enabled(os)); @@ -2006,7 +2004,7 @@ userquota_updates_task(void *arg) mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); - dnode_rele(dn, os->os_synced_dnodes); + dnode_rele(dn, &os->os_synced_dnodes); } do_userquota_cacheflush(os, &cache, tx); multilist_sublist_unlock(list); @@ -2032,12 +2030,12 @@ dnode_rele_task(void *arg) objset_t *os = uua->uua_os; multilist_sublist_t *list = - multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); + multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx); dnode_t *dn; while ((dn = multilist_sublist_head(list)) != NULL) { multilist_sublist_remove(list, dn); - dnode_rele(dn, os->os_synced_dnodes); + dnode_rele(dn, &os->os_synced_dnodes); } multilist_sublist_unlock(list); kmem_free(uua, sizeof (*uua)); @@ -2093,7 +2091,7 @@ dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx) { boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx); - int num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + int num_sublists = multilist_get_num_sublists(&os->os_synced_dnodes); for (int i = 0; i < num_sublists; i++) { userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 0fc788e28fe4..8434e72aa4f8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1671,7 +1671,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) */ dmu_objset_userquota_get_ids(dn, B_TRUE, tx); - multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK]; + multilist_t *dirtylist = &os->os_dirty_dnodes[txg & TXG_MASK]; multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn); /* diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 6da5faf01edf..9b9bb42287d5 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2267,8 +2267,7 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) dsl_bookmark_sync_done(ds, tx); - multilist_destroy(os->os_synced_dnodes); - os->os_synced_dnodes = NULL; + multilist_destroy(&os->os_synced_dnodes); if (os->os_encrypted) os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index c770eafa75d8..e66c136a9e02 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -568,8 +568,7 @@ dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) VERIFY0(zio_wait(zio)); dmu_objset_sync_done(dp->dp_meta_objset, tx); taskq_wait(dp->dp_sync_taskq); - multilist_destroy(dp->dp_meta_objset->os_synced_dnodes); - dp->dp_meta_objset->os_synced_dnodes = NULL; + multilist_destroy(&dp->dp_meta_objset->os_synced_dnodes); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3b2b79b2f42f..e588765b3382 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -416,7 +416,7 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops) mc->mc_spa = spa; mc->mc_ops = ops; mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL); - mc->mc_metaslab_txg_list = multilist_create(sizeof (metaslab_t), + multilist_create(&mc->mc_metaslab_txg_list, sizeof (metaslab_t), offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func); for (int i = 0; i < spa->spa_alloc_count; i++) { metaslab_class_allocator_t *mca = &mc->mc_allocator[i]; @@ -443,7 +443,7 @@ metaslab_class_destroy(metaslab_class_t *mc) zfs_refcount_destroy(&mca->mca_alloc_slots); } mutex_destroy(&mc->mc_lock); - multilist_destroy(mc->mc_metaslab_txg_list); + multilist_destroy(&mc->mc_metaslab_txg_list); kmem_free(mc, offsetof(metaslab_class_t, mc_allocator[spa->spa_alloc_count])); } @@ -639,7 +639,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc) void metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) { - multilist_t *ml = mc->mc_metaslab_txg_list; + multilist_t *ml = &mc->mc_metaslab_txg_list; for (int i = 0; i < multilist_get_num_sublists(ml); i++) { multilist_sublist_t *mls = multilist_sublist_lock(ml, i); metaslab_t *msp = multilist_sublist_head(mls); @@ -1139,7 +1139,7 @@ metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); multilist_sublist_unlock(mls); @@ -2175,20 +2175,20 @@ metaslab_potentially_evict(metaslab_class_t *mc) uint64_t size = spl_kmem_cache_entry_size(zfs_btree_leaf_cache); int tries = 0; for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size && - tries < multilist_get_num_sublists(mc->mc_metaslab_txg_list) * 2; + tries < multilist_get_num_sublists(&mc->mc_metaslab_txg_list) * 2; tries++) { unsigned int idx = multilist_get_random_index( - mc->mc_metaslab_txg_list); + &mc->mc_metaslab_txg_list); multilist_sublist_t *mls = - multilist_sublist_lock(mc->mc_metaslab_txg_list, idx); + multilist_sublist_lock(&mc->mc_metaslab_txg_list, idx); metaslab_t *msp = multilist_sublist_head(mls); multilist_sublist_unlock(mls); while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 < inuse * size) { VERIFY3P(mls, ==, multilist_sublist_lock( - mc->mc_metaslab_txg_list, idx)); + &mc->mc_metaslab_txg_list, idx)); ASSERT3U(idx, ==, - metaslab_idx_func(mc->mc_metaslab_txg_list, msp)); + metaslab_idx_func(&mc->mc_metaslab_txg_list, msp)); if (!multilist_link_active(&msp->ms_class_txg_node)) { multilist_sublist_unlock(mls); @@ -2535,7 +2535,7 @@ metaslab_unload(metaslab_t *msp) if (msp->ms_group != NULL) { metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); multilist_sublist_unlock(mls); @@ -2600,7 +2600,7 @@ metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg) ASSERT(MUTEX_HELD(&msp->ms_lock)); metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); msp->ms_selected_txg = txg; @@ -5682,7 +5682,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size, if (spa_writeable(spa)) { /* don't dirty if we're zdb(8) */ metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (!multilist_link_active(&msp->ms_class_txg_node)) { msp->ms_selected_txg = txg; multilist_sublist_insert_head(mls, msp); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 36c0d33bf1f6..eeac73bd7adf 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -68,8 +68,8 @@ multilist_d2l(multilist_t *ml, void *obj) * requirement, but a general rule of thumb in order to garner the * best multi-threaded performance out of the data structure. */ -static multilist_t * -multilist_create_impl(size_t size, size_t offset, +static void +multilist_create_impl(multilist_t *ml, size_t size, size_t offset, unsigned int num, multilist_sublist_index_func_t *index_func) { ASSERT3U(size, >, 0); @@ -77,7 +77,6 @@ multilist_create_impl(size_t size, size_t offset, ASSERT3U(num, >, 0); ASSERT3P(index_func, !=, NULL); - multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP); ml->ml_offset = offset; ml->ml_num_sublists = num; ml->ml_index_func = index_func; @@ -92,7 +91,6 @@ multilist_create_impl(size_t size, size_t offset, mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); list_create(&mls->mls_list, size, offset); } - return (ml); } /* @@ -103,8 +101,8 @@ multilist_create_impl(size_t size, size_t offset, * reserve the RAM necessary to create the extra slots for additional CPUs up * front, and dynamically adding them is a complex task. */ -multilist_t * -multilist_create(size_t size, size_t offset, +void +multilist_create(multilist_t *ml, size_t size, size_t offset, multilist_sublist_index_func_t *index_func) { int num_sublists; @@ -115,7 +113,7 @@ multilist_create(size_t size, size_t offset, num_sublists = MAX(boot_ncpus, 4); } - return (multilist_create_impl(size, offset, num_sublists, index_func)); + multilist_create_impl(ml, size, offset, num_sublists, index_func); } /* @@ -141,7 +139,7 @@ multilist_destroy(multilist_t *ml) ml->ml_num_sublists = 0; ml->ml_offset = 0; - kmem_free(ml, sizeof (multilist_t)); + ml->ml_sublists = NULL; } /* -- cgit v1.2.3 From 9d639d879954e694dea7d2fa5d9c4e9ff1ef0e69 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 11 Jun 2021 08:21:36 -0700 Subject: ZTS: Add zfs_clone_livelist_dedup.ksh to Makefile.am Commit 86b5f4c12 added a new zfs_clone_livelist_dedup.ksh test case but didn't include it in the Makefile.am. This results in the test not being included in the dist tarball so it's never run by the CI. Reviewed-by: John Kennedy Reviewed-by: Serapheim Dimitropoulos Signed-off-by: Brian Behlendorf Closes: #12224 --- tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am index c012b35d05b4..664f3d81aea6 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am @@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zfs_clone_livelist_condense_and_disable.ksh \ zfs_clone_livelist_condense_races.ksh \ + zfs_clone_livelist_dedup.ksh \ zfs_destroy_001_pos.ksh \ zfs_destroy_002_pos.ksh \ zfs_destroy_003_pos.ksh \ -- cgit v1.2.3 From 1a345d645a5389ac4f9922bc7ecf5cc519e15d32 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Fri, 11 Jun 2021 11:38:23 -0400 Subject: Added uncompress requirement Having an old enough version of "file" and no "uncompress" program installed can cause rpmbuild as root to crash and mangle rpmdb. So let's add a build dependency for RPM-based systems. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Rich Ercolani Closes: #12071 Closes: #12168 --- rpm/generic/zfs-dkms.spec.in | 7 +++++++ rpm/generic/zfs-kmod.spec.in | 7 +++++++ rpm/generic/zfs.spec.in | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index 0a6935516c6e..e0c410c680c2 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -32,6 +32,13 @@ Obsoletes: spl-dkms Provides: %{module}-kmod = %{version} AutoReqProv: no +%if 0%{?rhel}%{?fedora}%{?suse_version} +# We don't directly use it, but if this isn't installed, rpmbuild as root can +# crash+corrupt rpmdb +# See issue #12071 +BuildRequires: ncompress +%endif + %description This package contains the dkms ZFS kernel modules. diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in index 6e4bfdcfedc0..1692be1a72e6 100644 --- a/rpm/generic/zfs-kmod.spec.in +++ b/rpm/generic/zfs-kmod.spec.in @@ -57,6 +57,13 @@ BuildRequires: gcc, make BuildRequires: elfutils-libelf-devel %endif +%if 0%{?rhel}%{?fedora}%{?suse_version} +# We don't directly use it, but if this isn't installed, rpmbuild as root can +# crash+corrupt rpmdb +# See issue #12071 +BuildRequires: ncompress +%endif + # The developments headers will conflict with the dkms packages. Conflicts: %{module}-dkms diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 6ce64db4f17b..4a37ae8ce1d5 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -140,9 +140,14 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel +# We don't directly use it, but if this isn't installed, rpmbuild as root can +# crash+corrupt rpmdb +# See issue #12071 +BuildRequires: ncompress %if 0%{?fedora} >= 28 || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif + Requires: openssl %if 0%{?_systemd} BuildRequires: systemd -- cgit v1.2.3 From 3aa81a6635bf025298a40bf7b62ab57c556cfbbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 22 May 2021 16:23:59 +0200 Subject: linux/libzutil: use ARRAY_SIZE instead of constant for search paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- lib/libzutil/os/freebsd/zutil_import_os.c | 2 +- lib/libzutil/os/linux/zutil_import_os.c | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/libzutil/os/freebsd/zutil_import_os.c b/lib/libzutil/os/freebsd/zutil_import_os.c index 36c4d90aa4b9..2d8900ce2483 100644 --- a/lib/libzutil/os/freebsd/zutil_import_os.c +++ b/lib/libzutil/os/freebsd/zutil_import_os.c @@ -168,7 +168,7 @@ out: (void) close(fd); } -static const char * +static const char * const zpool_default_import_path[] = { "/dev" }; diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 84c3cb44fec7..0a647375b51e 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -74,7 +74,6 @@ #endif #include -#define DEFAULT_IMPORT_PATH_SIZE 9 #define DEV_BYID_PATH "/dev/disk/by-id/" static boolean_t @@ -255,8 +254,8 @@ zpool_open_func(void *arg) } } -static char * -zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { +static const char * const +zpool_default_import_path[] = { "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ "/dev/mapper", /* Use multipath devices before components */ "/dev/disk/by-partlabel", /* Single unique entry set by user */ @@ -271,8 +270,8 @@ zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { const char * const * zpool_default_search_paths(size_t *count) { - *count = DEFAULT_IMPORT_PATH_SIZE; - return ((const char * const *)zpool_default_import_path); + *count = ARRAY_SIZE(zpool_default_import_path); + return (zpool_default_import_path); } /* @@ -300,7 +299,7 @@ zfs_path_order(char *name, int *order) } free(envdup); } else { - for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(zpool_default_import_path); i++) { if (strncmp(name, zpool_default_import_path[i], strlen(zpool_default_import_path[i])) == 0) { *order = i; -- cgit v1.2.3 From 0854d4c186f52066ba0bd2da4282c5fde4bd9326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 3 Jun 2021 17:17:09 +0200 Subject: libzutil: add zfs_{base,dir}name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- include/libzutil.h | 3 + lib/libzfs_core/libzfs_core.abi | 1354 +++++++++++++++++++------------------- lib/libzutil/zutil_device_path.c | 16 + 3 files changed, 701 insertions(+), 672 deletions(-) diff --git a/include/libzutil.h b/include/libzutil.h index 5b0927961800..ef17bd5426df 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -160,6 +160,9 @@ _LIBZUTIL_H void color_start(char *color); _LIBZUTIL_H void color_end(void); _LIBZUTIL_H int printf_color(char *color, char *format, ...); +_LIBZUTIL_H const char *zfs_basename(const char *path); +_LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path); + #ifdef __cplusplus } #endif diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index f397929e1afe..c54a994f79bc 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -253,11 +253,13 @@ + + @@ -1747,22 +1749,32 @@ - + + + + + + + + + + + - + - + - + @@ -1770,11 +1782,11 @@ - - + + - + @@ -1795,11 +1807,11 @@ - + - + @@ -1811,14 +1823,14 @@ - + - + - + @@ -1829,16 +1841,16 @@ - + - + - + - + @@ -1850,20 +1862,20 @@ - + - + - + - - - + + + - + @@ -1872,10 +1884,10 @@ - + - + @@ -1883,74 +1895,73 @@ - - + + - + - + - + - + - - + + - + - + - - + + - + - - + + - - - - - + + + + - - + + - - - + + + - - + + - - + + - + - + @@ -1960,24 +1971,24 @@ - - + + - + - + - + - + @@ -2001,27 +2012,27 @@ - - - + + + - + - + - - - - - - - + + + + + + + - - + + @@ -2030,27 +2041,27 @@ - + - + - + - - + + - + - + @@ -2061,66 +2072,66 @@ - + - + - - + + - - + + - + - + - + - + - + - + - + - + - + - + - - - - - + + + + + - + - + - - + + @@ -2166,8 +2177,8 @@ - - + + @@ -2181,8 +2192,8 @@ - - + + @@ -2194,42 +2205,42 @@ - - + + - - + + - + - + - - + + - + - + - + - + @@ -2244,12 +2255,12 @@ - + - - + + @@ -2272,21 +2283,21 @@ - - + + - + - + - + - + - + @@ -2295,31 +2306,31 @@ - - - + + + - - + + - + - + - + - + @@ -2328,28 +2339,28 @@ - + - + - + - - + + - + @@ -2359,7 +2370,7 @@ - + @@ -2370,37 +2381,37 @@ - - + + - + - + - + - + - + @@ -2426,25 +2437,25 @@ - - - - + + + + - + - - + + - - - + + + - - + + @@ -2459,21 +2470,21 @@ - - + + - - + + - + @@ -2488,15 +2499,15 @@ - + - + - + - + @@ -2505,10 +2516,10 @@ - + - - + + @@ -2524,7 +2535,7 @@ - + @@ -2538,16 +2549,16 @@ - + - + - + - + @@ -2556,27 +2567,27 @@ - + - + - + - - - - + + + + - + - + @@ -2588,65 +2599,65 @@ - - + + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - - + + - - + + - + - + @@ -2657,27 +2668,27 @@ - - + + - + - - + + - + - + - + - - + + @@ -2686,21 +2697,21 @@ - + - + - + - - + + @@ -2709,113 +2720,113 @@ - + - + - + - + - - - + + + - + - - + + - - + + - + - - + + - - + + - + - - + + - - + + - + - + - - + + - - + + - + - + - - + + - - + + - + - + - + - + - + - + - + - + @@ -2832,145 +2843,145 @@ - - + + - + - - + + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - - + + - - - - + + + + - + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - + - + @@ -2994,39 +3005,39 @@ - - - - + + + + - + - + - + - + - + - + - + - + - + - + @@ -3035,19 +3046,19 @@ - + - + - + - + - - + + @@ -3056,212 +3067,212 @@ - + - - + + - - + + - + - + - - - - + + + + - + - + - - + + - - + + - - + + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - - + + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - - + + - - + + - - + + - + - - + + - + - + - + - - + + - + - + - + - + - - + + - - + + - + - - + + - + - + @@ -3269,56 +3280,56 @@ - + - + - + - + - + - + - + - - + + - + - + - + - + - + @@ -3327,603 +3338,602 @@ - - + + - + - - - + + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - - - + + + - - + + - - + + - + - + - + - + - + - - + + - - + + - - - + + - - + + - - + + - + - + - + - + - + - - + + - - + + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - + - - + + - - + + - - + + - + - + - - + + - + - - + + - - - + + + - + - + - + - + - + - + - + - + - - + + - - + + - - + + - - - + + + - + - + - + - + - + - + - + - + - - + + - - + + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - - - - + + + + - - - - + + + + - - - - + + + + - + - + - - - - + + + + - + - + - + - - - + + + - - - + + + - - - + + + - + - + - - - + + + - + - + - + - + @@ -4006,16 +4016,16 @@ - + - + - + @@ -4030,7 +4040,7 @@ - + @@ -4045,7 +4055,7 @@ - + @@ -4149,7 +4159,7 @@ - + @@ -4179,13 +4189,13 @@ - + - + - + @@ -4199,7 +4209,7 @@ - + @@ -4214,10 +4224,10 @@ - + - + @@ -4236,8 +4246,8 @@ - - + + @@ -4330,7 +4340,7 @@ - + @@ -4351,23 +4361,23 @@ - + - - + + - - + + @@ -4447,7 +4457,7 @@ - + @@ -4456,7 +4466,7 @@ - + @@ -4469,7 +4479,7 @@ - + @@ -4505,7 +4515,7 @@ - + diff --git a/lib/libzutil/zutil_device_path.c b/lib/libzutil/zutil_device_path.c index bcdc72baa682..435c444b2460 100644 --- a/lib/libzutil/zutil_device_path.c +++ b/lib/libzutil/zutil_device_path.c @@ -31,6 +31,22 @@ #include +/* Substring from after the last slash, or the string itself if none */ +const char * +zfs_basename(const char *path) +{ + const char *bn = strrchr(path, '/'); + return (bn ? bn + 1 : path); +} + +/* Return index of last slash or -1 if none */ +ssize_t +zfs_dirnamelen(const char *path) +{ + const char *end = strrchr(path, '/'); + return (end ? end - path : -1); +} + /* * Given a shorthand device name check if a file by that name exists in any * of the 'zpool_default_import_path' or ZPOOL_IMPORT_PATH directories. If -- cgit v1.2.3 From bf80fb53f59c1d98c03758dec6322cfacec4ccf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 22 May 2021 16:26:40 +0200 Subject: linux/libzutil: zpool_open_func: don't dup name, extract untouchables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- lib/libzutil/os/linux/zutil_import_os.c | 35 +++++++++++---------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 0a647375b51e..433a72282bb7 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -76,18 +76,19 @@ #define DEV_BYID_PATH "/dev/disk/by-id/" +/* + * Skip devices with well known prefixes: + * there can be side effects when opening devices which need to be avoided. + * + * hpet - High Precision Event Timer + * watchdog[N] - Watchdog must be closed in a special way. + */ static boolean_t -is_watchdog_dev(char *dev) +should_skip_dev(const char *dev) { - /* For 'watchdog' dev */ - if (strcmp(dev, "watchdog") == 0) - return (B_TRUE); - - /* For 'watchdog */ - if (strstr(dev, "watchdog") == dev && isdigit(dev[8])) - return (B_TRUE); - - return (B_FALSE); + return ((strcmp(dev, "watchdog") == 0) || + (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) || + (strcmp(dev, "hpet") == 0)); } int @@ -103,24 +104,12 @@ zpool_open_func(void *arg) libpc_handle_t *hdl = rn->rn_hdl; struct stat64 statbuf; nvlist_t *config; - char *bname, *dupname; uint64_t vdev_guid = 0; int error; int num_labels = 0; int fd; - /* - * Skip devices with well known prefixes there can be side effects - * when opening devices which need to be avoided. - * - * hpet - High Precision Event Timer - * watchdog - Watchdog must be closed in a special way. - */ - dupname = zutil_strdup(hdl, rn->rn_name); - bname = basename(dupname); - error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname)); - free(dupname); - if (error) + if (should_skip_dev(zfs_basename(rn->rn_name))) return; /* -- cgit v1.2.3 From 64dfdaba372f07f91a6eab598b3480693b1d14c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 22 May 2021 16:29:53 +0200 Subject: libzutil: import: filter out unsuitable files earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only accept the right type of file, if available, and reject too-small files before opening them on Linux Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- lib/libzutil/os/linux/zutil_import_os.c | 12 +++--------- lib/libzutil/zutil_import.c | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 433a72282bb7..5defb526f210 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -114,9 +114,11 @@ zpool_open_func(void *arg) /* * Ignore failed stats. We only want regular files and block devices. + * Ignore files that are too small to hold a zpool. */ if (stat64(rn->rn_name, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) + (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) || + (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE)) return; /* @@ -132,14 +134,6 @@ zpool_open_func(void *arg) if (fd < 0) return; - /* - * This file is too small to hold a zpool - */ - if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } - error = zpool_read_label(fd, &config, &num_labels); if (error != 0) { (void) close(fd); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 9d7fcb8d9685..871a75ab2326 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1243,10 +1243,21 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock, while ((dp = readdir64(dirp)) != NULL) { const char *name = dp->d_name; - if (name[0] == '.' && - (name[1] == 0 || (name[1] == '.' && name[2] == 0))) + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + switch (dp->d_type) { + case DT_UNKNOWN: + case DT_BLK: +#ifdef __FreeBSD__ + case DT_CHR: +#endif + case DT_REG: + break; + default: + continue; + } + zpool_find_import_scan_add_slice(hdl, lock, cache, path, name, order); } -- cgit v1.2.3 From feb04e66802ef96aa77951c43d4b632b376041ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 22 May 2021 17:19:14 +0200 Subject: Forbid basename(3) and dirname(3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are at least two interpretations of basename(3), in addition to both functions being allowed to /both/ return a static buffer (unsuitable in multi-threaded environments) /and/ raze the input (which encourages overallocations, at best) Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- cmd/zed/agents/zfs_retire.c | 3 +- config/Rules.am | 3 ++ lib/libzfs/libzfs_pool.c | 2 +- lib/libzpool/kernel.c | 18 +++-------- lib/libzutil/zutil_import.c | 37 ++++++++++++++++------ tests/zfs-tests/tests/functional/ctime/Makefile.am | 2 ++ tests/zfs-tests/tests/functional/ctime/ctime.c | 13 +++----- 7 files changed, 45 insertions(+), 33 deletions(-) diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index 1c4cc885b5e5..1563f5d2792c 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -240,7 +241,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) ZPOOL_CONFIG_CHILDREN, &spares[s], 1); fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'", - dev_name, basename(spare_name)); + dev_name, zfs_basename(spare_name)); if (zpool_vdev_attach(zhp, dev_name, spare_name, replacement, B_TRUE, rebuild) == 0) { diff --git a/config/Rules.am b/config/Rules.am index ef10d493896d..8fe2fa9ca8d9 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -54,6 +54,9 @@ if BUILD_FREEBSD AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-freebsd-user\" endif AM_CPPFLAGS += -D"strtok(...)=strtok(__VA_ARGS__) __attribute__((deprecated(\"Use strtok_r(3) instead!\")))" +AM_CPPFLAGS += -D"__xpg_basename(...)=__xpg_basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" +AM_CPPFLAGS += -D"basename(...)=basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" +AM_CPPFLAGS += -D"dirname(...)=dirname(__VA_ARGS__) __attribute__((deprecated(\"dirname(3) is underspecified. Use zfs_dirnamelen() instead!\")))" AM_LDFLAGS = $(DEBUG_LDFLAGS) AM_LDFLAGS += $(ASAN_LDFLAGS) diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index adc36c47f290..c0bf9d067d42 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -4309,7 +4309,7 @@ zfs_save_arguments(int argc, char **argv, char *string, int len) { int i; - (void) strlcpy(string, basename(argv[0]), len); + (void) strlcpy(string, zfs_basename(argv[0]), len); for (i = 1; i < argc; i++) { (void) strlcat(string, " ", len); (void) strlcat(string, argv[i], len); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index cc8e534e7eb5..836eb176e13d 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -541,19 +542,10 @@ void __dprintf(boolean_t dprint, const char *file, const char *func, int line, const char *fmt, ...) { - const char *newfile; - va_list adx; - - /* - * Get rid of annoying "../common/" prefix to filename. - */ - newfile = strrchr(file, '/'); - if (newfile != NULL) { - newfile = newfile + 1; /* Get rid of leading / */ - } else { - newfile = file; - } + /* Get rid of annoying "../common/" prefix to filename. */ + const char *newfile = zfs_basename(file); + va_list adx; if (dprint) { /* dprintf messages are printed immediately */ @@ -1040,7 +1032,7 @@ zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) if (vn_dumpdir != NULL) { char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); - char *inpath = basename((char *)(uintptr_t)path); + const char *inpath = zfs_basename(path); (void) snprintf(dumppath, MAXPATHLEN, "%s/%s", vn_dumpdir, inpath); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 871a75ab2326..9bd12973fc51 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -154,6 +154,17 @@ zutil_strdup(libpc_handle_t *hdl, const char *str) return (ret); } +static char * +zutil_strndup(libpc_handle_t *hdl, const char *str, size_t n) +{ + char *ret; + + if ((ret = strndup(str, n)) == NULL) + (void) zutil_no_memory(hdl); + + return (ret); +} + /* * Intermediate structures used to gather configuration information. */ @@ -1272,20 +1283,22 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, { int error = 0; char path[MAXPATHLEN]; - char *d, *b; - char *dpath, *name; + char *d = NULL; + ssize_t dl; + const char *dpath, *name; /* - * Separate the directory part and last part of the - * path. We do this so that we can get the realpath of + * Separate the directory and the basename. + * We do this so that we can get the realpath of * the directory. We don't get the realpath on the * whole path because if it's a symlink, we want the * path of the symlink not where it points to. */ - d = zutil_strdup(hdl, dir); - b = zutil_strdup(hdl, dir); - dpath = dirname(d); - name = basename(b); + name = zfs_basename(dir); + if ((dl = zfs_dirnamelen(dir)) == -1) + dpath = "."; + else + dpath = d = zutil_strndup(hdl, dir, dl); if (realpath(dpath, path) == NULL) { error = errno; @@ -1303,7 +1316,6 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, zpool_find_import_scan_add_slice(hdl, lock, cache, path, name, order); out: - free(b); free(d); return (error); } @@ -1506,6 +1518,7 @@ discover_cached_paths(libpc_handle_t *hdl, nvlist_t *nv, avl_tree_t *cache, pthread_mutex_t *lock) { char *path = NULL; + ssize_t dl; uint_t children; nvlist_t **child; @@ -1521,8 +1534,12 @@ discover_cached_paths(libpc_handle_t *hdl, nvlist_t *nv, * our directory cache. */ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { + if ((dl = zfs_dirnamelen(path)) == -1) + path = "."; + else + path[dl] = '\0'; return (zpool_find_import_scan_dir(hdl, lock, cache, - dirname(path), 0)); + path, 0)); } return (0); } diff --git a/tests/zfs-tests/tests/functional/ctime/Makefile.am b/tests/zfs-tests/tests/functional/ctime/Makefile.am index e7479ae81056..3174f78c6249 100644 --- a/tests/zfs-tests/tests/functional/ctime/Makefile.am +++ b/tests/zfs-tests/tests/functional/ctime/Makefile.am @@ -11,3 +11,5 @@ pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/ctime pkgexec_PROGRAMS = ctime ctime_SOURCES = ctime.c + +ctime_LDADD = $(abs_top_builddir)/lib/libzfs_core/libzfs_core.la diff --git a/tests/zfs-tests/tests/functional/ctime/ctime.c b/tests/zfs-tests/tests/functional/ctime/ctime.c index d01fa0d4ed3e..2d515d957a90 100644 --- a/tests/zfs-tests/tests/functional/ctime/ctime.c +++ b/tests/zfs-tests/tests/functional/ctime/ctime.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -149,22 +150,18 @@ static int do_link(const char *pfile) { int ret = 0; - char link_file[BUFSIZ] = { 0 }; - char pfile_copy[BUFSIZ] = { 0 }; - char *dname; + char link_file[BUFSIZ + 16] = { 0 }; if (pfile == NULL) { return (-1); } - strncpy(pfile_copy, pfile, sizeof (pfile_copy)-1); - pfile_copy[sizeof (pfile_copy) - 1] = '\0'; /* * Figure out source file directory name, and create * the link file in the same directory. */ - dname = dirname((char *)pfile_copy); - (void) snprintf(link_file, BUFSIZ, "%s/%s", dname, "link_file"); + (void) snprintf(link_file, sizeof (link_file), + "%.*s/%s", (int)zfs_dirnamelen(pfile), pfile, "link_file"); if (link(pfile, link_file) == -1) { (void) fprintf(stderr, "link(%s, %s) failed with errno %d\n", @@ -321,7 +318,7 @@ main(int argc, char *argv[]) (void) snprintf(tfile, sizeof (tfile), "%s/%s", penv[0], penv[1]); /* - * If the test file is exists, remove it first. + * If the test file exists, remove it first. */ if (access(tfile, F_OK) == 0) { (void) unlink(tfile); -- cgit v1.2.3 From 10bcc4da6ceced9746fe498f12d0603951258367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 3 Jun 2021 19:40:09 +0200 Subject: scripts/commitcheck.sh: fix false positive for signed commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Ahelenia Ziemiańska Closes #12105 --- scripts/commitcheck.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/commitcheck.sh b/scripts/commitcheck.sh index 0077eb6b0406..cb9fd66c6f46 100755 --- a/scripts/commitcheck.sh +++ b/scripts/commitcheck.sh @@ -7,7 +7,7 @@ REF="HEAD" test_commit_bodylength() { length="72" - body=$(git log -n 1 --pretty=%b "$REF" | grep -Ev "http(s)*://" | grep -E -m 1 ".{$((length + 1))}") + body=$(git log --no-show-signature -n 1 --pretty=%b "$REF" | grep -Ev "http(s)*://" | grep -E -m 1 ".{$((length + 1))}") if [ -n "$body" ]; then echo "error: commit message body contains line over ${length} characters" return 1 @@ -20,7 +20,7 @@ test_commit_bodylength() check_tagged_line() { regex='^[[:space:]]*'"$1"':[[:space:]][[:print:]]+[[:space:]]<[[:graph:]]+>$' - foundline=$(git log -n 1 "$REF" | grep -E -m 1 "$regex") + foundline=$(git log --no-show-signature -n 1 "$REF" | grep -E -m 1 "$regex") if [ -z "$foundline" ]; then echo "error: missing \"$1\"" return 1 @@ -35,7 +35,7 @@ new_change_commit() error=0 # subject is not longer than 72 characters - long_subject=$(git log -n 1 --pretty=%s "$REF" | grep -E -m 1 '.{73}') + long_subject=$(git log --no-show-signature -n 1 --pretty=%s "$REF" | grep -E -m 1 '.{73}') if [ -n "$long_subject" ]; then echo "error: commit subject over 72 characters" error=1 @@ -57,7 +57,7 @@ new_change_commit() is_coverity_fix() { # subject starts with Fix coverity defects means it's a coverity fix - subject=$(git log -n 1 --pretty=%s "$REF" | grep -E -m 1 '^Fix coverity defects') + subject=$(git log --no-show-signature -n 1 --pretty=%s "$REF" | grep -E -m 1 '^Fix coverity defects') if [ -n "$subject" ]; then return 0 fi @@ -70,7 +70,7 @@ coverity_fix_commit() error=0 # subject starts with Fix coverity defects: CID dddd, dddd... - subject=$(git log -n 1 --pretty=%s "$REF" | + subject=$(git log --no-show-signature -n 1 --pretty=%s "$REF" | grep -E -m 1 'Fix coverity defects: CID [[:digit:]]+(, [[:digit:]]+)*') if [ -z "$subject" ]; then echo "error: Coverity defect fixes must have a subject line that starts with \"Fix coverity defects: CID dddd\"" @@ -86,7 +86,7 @@ coverity_fix_commit() OLDIFS=$IFS IFS=' ' - for line in $(git log -n 1 --pretty=%b "$REF" | grep -E '^CID'); do + for line in $(git log --no-show-signature -n 1 --pretty=%b "$REF" | grep -E '^CID'); do if ! echo "$line" | grep -qE '^CID [[:digit:]]+: ([[:graph:]]+|[[:space:]])+ \(([[:upper:]]|\_)+\)'; then echo "error: commit message has an improperly formatted CID defect line" error=1 -- cgit v1.2.3 From afa7b3484556d3ae610a34582ce5ebd2c3e27bba Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Fri, 11 Jun 2021 20:00:33 -0400 Subject: Do not hash unlinked inodes In zfs_znode_alloc we always hash inodes. If the znode is unlinked, we do not need to hash it. This fixes the problem where zfs_suspend_fs is doing zrele (iput) in an async fashion, and zfs_resume_fs unlinked drain processing will try to hash an inode that could still be hashed, resulting in a panic. Reviewed-by: Brian Behlendorf Reviewed-by: Alan Somers Signed-off-by: Paul Zuchowski Closes #9741 Closes #11223 Closes #11648 Closes #12210 --- module/os/linux/zfs/zfs_znode.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 6015aea62dca..577927747aef 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -606,17 +606,24 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, * number is already hashed for this super block. This can never * happen because the inode numbers map 1:1 with the object numbers. * - * The one exception is rolling back a mounted file system, but in - * this case all the active inode are unhashed during the rollback. + * Exceptions include rolling back a mounted file system, either + * from the zfs rollback or zfs recv command. + * + * Active inodes are unhashed during the rollback, but since zrele + * can happen asynchronously, we can't guarantee they've been + * unhashed. This can cause hash collisions in unlinked drain + * processing so do not hash unlinked znodes. */ - VERIFY3S(insert_inode_locked(ip), ==, 0); + if (links > 0) + VERIFY3S(insert_inode_locked(ip), ==, 0); mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); zfsvfs->z_nr_znodes++; mutex_exit(&zfsvfs->z_znodes_lock); - unlock_new_inode(ip); + if (links > 0) + unlock_new_inode(ip); return (zp); error: -- cgit v1.2.3