diff options
Diffstat (limited to 'sys/contrib/openzfs')
275 files changed, 5491 insertions, 1421 deletions
diff --git a/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/feature_request.md b/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/feature_request.md index 9b50a4a3d96e..f3d4316f6f67 100644 --- a/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/feature_request.md +++ b/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/feature_request.md @@ -14,7 +14,7 @@ Please check our issue tracker before opening a new feature request. Filling out the following template will help other contributors better understand your proposed feature. --> -### Describe the feature would like to see added to OpenZFS +### Describe the feature you would like to see added to OpenZFS <!-- Provide a clear and concise description of the feature. diff --git a/sys/contrib/openzfs/.github/PULL_REQUEST_TEMPLATE.md b/sys/contrib/openzfs/.github/PULL_REQUEST_TEMPLATE.md index 79809179cf13..47edc8174603 100644 --- a/sys/contrib/openzfs/.github/PULL_REQUEST_TEMPLATE.md +++ b/sys/contrib/openzfs/.github/PULL_REQUEST_TEMPLATE.md @@ -2,11 +2,6 @@ <!--- Provide a general summary of your changes in the Title above --> -<!--- -Documentation on ZFS Buildbot options can be found at -https://openzfs.github.io/openzfs-docs/Developer%20Resources/Buildbot%20Options.html ---> - ### Motivation and Context <!--- Why is this change required? What problem does it solve? --> <!--- If it fixes an open issue, please link to the issue here. --> diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py index b49255e8381d..08021aabcb61 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py +++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py @@ -65,7 +65,7 @@ if __name__ == '__main__': # check last (HEAD) commit message last_commit_message_raw = subprocess.run([ - 'git', 'show', '-s', '--format=%B', 'HEAD' + 'git', 'show', '-s', '--format=%B', head ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in last_commit_message_raw.stdout.decode().splitlines(): diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh index de29ad1f57b6..0278264d9279 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh @@ -6,6 +6,13 @@ set -eu +# We've been seeing this script take over 15min to run. This may or +# may not be normal. Just to get a little more insight, print out +# a message to stdout with the top running process, and do this every +# 30 seconds. We can delete this watchdog later once we get a better +# handle on what the timeout value should be. +(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) & + # install needed packages export DEBIAN_FRONTEND="noninteractive" sudo apt-get -y update @@ -65,3 +72,6 @@ sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \ for i in /sys/block/s*/queue/scheduler; do echo "none" | sudo tee $i done + +# Kill off our watchdog +kill $(jobs -p) diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 70a2364f1fc6..1c608348ffcd 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -25,6 +25,10 @@ UBMIRROR="https://cloud-images.ubuntu.com" # default nic model for vm's NIC="virtio" +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in almalinux8) OSNAME="AlmaLinux 8" @@ -61,6 +65,14 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; + debian13) + OSNAME="Debian 13" + # TODO: Overwrite OSv to debian13 for virt-install until it's added to osinfo + OSv="debian12" + URL="https://cloud.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2" + OPTS[0]="--boot" + OPTS[1]="uefi=on" + ;; fedora41) OSNAME="Fedora 41" OSv="fedora-unknown" @@ -109,7 +121,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-PRERELEASE" + FreeBSD="15.0-ALPHA3" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -242,7 +254,7 @@ sudo virt-install \ --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ --cloud-init user-data=/tmp/user-data \ --disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} >/dev/null # Give the VMs hostnames so we don't have to refer to them with # hardcoded IP addresses. diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh index c41ecd09d52e..f67bb2f68e94 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -20,7 +20,7 @@ function archlinux() { sudo pacman -Sy --noconfirm base-devel bc cpio cryptsetup dhclient dkms \ fakeroot fio gdb inetutils jq less linux linux-headers lsscsi nfs-utils \ parted pax perf python-packaging python-setuptools qemu-guest-agent ksh \ - samba sysstat rng-tools rsync wget xxhash + samba strace sysstat rng-tools rsync wget xxhash echo "##[endgroup]" } @@ -41,9 +41,10 @@ function debian() { libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \ libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ - python3-cffi python3-dev python3-distlib python3-packaging \ + python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ - rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev + rsync samba strace sysstat uuid-dev watchdog wget xfslibs-dev xxhash \ + zlib1g-dev echo "##[endgroup]" } @@ -87,8 +88,8 @@ function rhel() { libuuid-devel lsscsi mdadm nfs-utils openssl-devel pam-devel pamtester \ parted perf python3 python3-cffi python3-devel python3-packaging \ kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \ - rpm-build rsync samba sysstat systemd watchdog wget xfsprogs-devel xxhash \ - zlib-devel + rpm-build rsync samba strace sysstat systemd watchdog wget xfsprogs-devel \ + xxhash zlib-devel echo "##[endgroup]" } @@ -104,7 +105,7 @@ function install_fedora_experimental_kernel { our_version="$1" sudo dnf -y copr enable @kernel-vanilla/stable sudo dnf -y copr enable @kernel-vanilla/mainline - all="$(sudo dnf list --showduplicates kernel-*)" + all="$(sudo dnf list --showduplicates kernel-* python3-perf* perf* bpftool*)" echo "Available versions:" echo "$all" diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh index 6bf10024a1a6..4869c1003e48 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh @@ -12,16 +12,26 @@ source /var/tmp/env.txt # wait for poweroff to succeed PID=$(pidof /usr/bin/qemu-system-x86_64) tail --pid=$PID -f /dev/null -sudo virsh undefine openzfs +sudo virsh undefine --nvram openzfs # cpu pinning CPUSET=("0,1" "2,3") +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in freebsd*) # FreeBSD needs only 6GiB RAM=6 ;; + debian13) + RAM=8 + # Boot Debian 13 with uefi=on and secureboot=off (ZFS Kernel Module not signed) + OPTS[0]="--boot" + OPTS[1]="firmware=efi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no" + ;; *) # Linux needs more memory, but can be optimized to share it via KSM RAM=8 @@ -79,7 +89,7 @@ EOF --network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \ --disk $DISK-system,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ --disk $DISK-tests,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} done # generate some memory stats @@ -98,19 +108,30 @@ echo '*/5 * * * * /root/cronjob.sh' > crontab.txt sudo crontab crontab.txt rm crontab.txt -# check if the machines are okay -echo "Waiting for vm's to come up... (${VMs}x CPU=$CPU RAM=$RAM)" -for ((i=1; i<=VMs; i++)); do - .github/workflows/scripts/qemu-wait-for-vm.sh vm$i -done -echo "All $VMs VMs are up now." - # Save the VM's serial output (ttyS0) to /var/tmp/console.txt # - ttyS0 on the VM corresponds to a local /dev/pty/N entry # - use 'virsh ttyconsole' to lookup the /dev/pty/N entry for ((i=1; i<=VMs; i++)); do mkdir -p $RESPATH/vm$i read "pty" <<< $(sudo virsh ttyconsole vm$i) + + # Create the file so we can tail it, even if there's no output. + touch $RESPATH/vm$i/console.txt + sudo nohup bash -c "cat $pty > $RESPATH/vm$i/console.txt" & + + # Write all VM boot lines to the console to aid in debugging failed boots. + # The boot lines from all the VMs will be munged together, so prepend each + # line with the vm hostname (like 'vm1:'). + (while IFS=$'\n' read -r line; do echo "vm$i: $line" ; done < <(sudo tail -f $RESPATH/vm$i/console.txt)) & + done echo "Console logging for ${VMs}x $OS started." + + +# check if the machines are okay +echo "Waiting for vm's to come up... (${VMs}x CPU=$CPU RAM=$RAM)" +for ((i=1; i<=VMs; i++)); do + .github/workflows/scripts/qemu-wait-for-vm.sh vm$i +done +echo "All $VMs VMs are up now." diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh index 5ab822f4f076..ca6ac77f146d 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh @@ -111,7 +111,7 @@ fi sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt -$TDIR/zfs-tests.sh -vK -s 3GB -T $TAGS +$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index cda620313189..69349678d84c 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -29,7 +29,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -44,7 +44,7 @@ jobs: os_selection="$FULL_OS" fi - if [ ${{ github.event.inputs.fedora_kernel_ver }} != "" ] ; then + if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then # They specified a custom kernel version for Fedora. Use only # Fedora runners. os_json=$(echo ${os_selection} | jq -c '[.[] | select(startswith("fedora"))]') @@ -53,9 +53,8 @@ jobs: os_json=$(echo ${os_selection} | jq -c) fi - echo $os_json - echo "os=$os_json" >> $GITHUB_OUTPUT - echo "ci_type=$ci_type" >> $GITHUB_OUTPUT + echo "os=$os_json" | tee -a $GITHUB_OUTPUT + echo "ci_type=$ci_type" | tee -a $GITHUB_OUTPUT qemu-vm: name: qemu-x86 @@ -63,8 +62,8 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora41 - # debian: debian11, debian12, ubuntu22, ubuntu24 + # rhl: almalinux8, almalinux9, centos-stream9, fedora4x + # debian: debian12, debian13, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r @@ -78,8 +77,12 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Setup QEMU - timeout-minutes: 10 - run: .github/workflows/scripts/qemu-1-setup.sh + timeout-minutes: 20 + run: | + # Add a timestamp to each line to debug timeouts + while IFS=$'\n' read -r line; do + echo "$(date +'%H:%M:%S') $line" + done < <(.github/workflows/scripts/qemu-1-setup.sh) - name: Start build machine timeout-minutes: 10 diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 1a9c671feac6..bdb7aee48041 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.3.99 +Version: 2.4.99 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.16 +Linux-Maximum: 6.17 Linux-Minimum: 4.18 diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am index 5f09d170e730..30f78e490b78 100644 --- a/sys/contrib/openzfs/Makefile.am +++ b/sys/contrib/openzfs/Makefile.am @@ -1,6 +1,7 @@ CLEANFILES = dist_noinst_DATA = INSTALL_DATA_HOOKS = +INSTALL_EXEC_HOOKS = ALL_LOCAL = CLEAN_LOCAL = CHECKS = shellcheck checkbashisms @@ -71,6 +72,9 @@ all: gitrev PHONY += install-data-hook $(INSTALL_DATA_HOOKS) install-data-hook: $(INSTALL_DATA_HOOKS) +PHONY += install-exec-hook $(INSTALL_EXEC_HOOKS) +install-exec-hook: $(INSTALL_EXEC_HOOKS) + PHONY += maintainer-clean-local maintainer-clean-local: -$(RM) $(GITREV) diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am index 96040976e53e..ca94f6b77e06 100644 --- a/sys/contrib/openzfs/cmd/Makefile.am +++ b/sys/contrib/openzfs/cmd/Makefile.am @@ -98,17 +98,16 @@ endif if USING_PYTHON -bin_SCRIPTS += arc_summary arcstat dbufstat zilstat -CLEANFILES += arc_summary arcstat dbufstat zilstat -dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in +bin_SCRIPTS += zarcsummary zarcstat dbufstat zilstat +CLEANFILES += zarcsummary zarcstat dbufstat zilstat +dist_noinst_DATA += %D%/zarcsummary %D%/zarcstat.in %D%/dbufstat.in %D%/zilstat.in -$(call SUBST,arcstat,%D%/) +$(call SUBST,zarcstat,%D%/) $(call SUBST,dbufstat,%D%/) $(call SUBST,zilstat,%D%/) -arc_summary: %D%/arc_summary +zarcsummary: %D%/zarcsummary $(AM_V_at)cp $< $@ endif - PHONY += cmd cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS) diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/zarcstat.in index 6f9abb39c3fb..8ffd20481166 100755 --- a/sys/contrib/openzfs/cmd/arcstat.in +++ b/sys/contrib/openzfs/cmd/zarcstat.in @@ -2,7 +2,7 @@ # SPDX-License-Identifier: CDDL-1.0 # # Print out ZFS ARC Statistics exported via kstat(1) -# For a definition of fields, or usage, use arcstat -v +# For a definition of fields, or usage, use zarcstat -v # # This script was originally a fork of the original arcstat.pl (0.1) # by Neelakanth Nadgir, originally published on his Sun blog on @@ -56,6 +56,7 @@ import time import getopt import re import copy +import os from signal import signal, SIGINT, SIGWINCH, SIG_DFL @@ -171,7 +172,7 @@ cols = { "zactive": [7, 1000, "zfetch prefetches active per second"], } -# ARC structural breakdown from arc_summary +# ARC structural breakdown from zarcsummary structfields = { "cmp": ["compressed", "Compressed"], "ovh": ["overhead", "Overhead"], @@ -187,7 +188,7 @@ structstats = { # size stats "sz": ["_size", "size"], } -# ARC types breakdown from arc_summary +# ARC types breakdown from zarcsummary typefields = { "data": ["data", "ARC data"], "meta": ["metadata", "ARC metadata"], @@ -198,7 +199,7 @@ typestats = { # size stats "sz": ["_size", "size"], } -# ARC states breakdown from arc_summary +# ARC states breakdown from zarcsummary statefields = { "ano": ["anon", "Anonymous"], "mfu": ["mfu", "MFU"], @@ -261,7 +262,7 @@ hdr_intr = 20 # Print header every 20 lines of output opfile = None sep = " " # Default separator is 2 spaces l2exist = False -cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval " +cmd = ("Usage: zarcstat [-havxp] [-f fields] [-o file] [-s string] [interval " "[count]]\n") cur = {} d = {} @@ -348,10 +349,10 @@ def usage(): "character or string\n") sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n") sys.stderr.write("\nExamples:\n") - sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n") - sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n") - sys.stderr.write("\tarcstat -v\n") - sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n") + sys.stderr.write("\tzarcstat -o /tmp/a.log 2 10\n") + sys.stderr.write("\tzarcstat -s \",\" -o /tmp/a.log 2 10\n") + sys.stderr.write("\tzarcstat -v\n") + sys.stderr.write("\tzarcstat -f time,hit%,dh%,ph%,mh% 1\n") sys.stderr.write("\n") sys.exit(1) @@ -366,7 +367,7 @@ def snap_stats(): cur = kstat - # fill in additional values from arc_summary + # fill in additional values from zarcsummary cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\ cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\ cur["uncached_data"]+cur["uncached_metadata"] @@ -766,6 +767,7 @@ def calculate(): def main(): + global sint global count global hdr_intr diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/zarcsummary index e60c6b64e8a1..24a129d9ca70 100755 --- a/sys/contrib/openzfs/cmd/arc_summary +++ b/sys/contrib/openzfs/cmd/zarcsummary @@ -34,7 +34,7 @@ Provides basic information on the ARC, its efficiency, the L2ARC (if present), the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the in-source documentation and code at https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details. -The original introduction to arc_summary can be found at +The original introduction to zarcsummary can be found at http://cuddletech.com/?p=454 """ @@ -161,7 +161,7 @@ elif sys.platform.startswith('linux'): return get_params(TUNABLES_PATH) def get_version_impl(request): - # The original arc_summary called /sbin/modinfo/{spl,zfs} to get + # The original zarcsummary called /sbin/modinfo/{spl,zfs} to get # the version information. We switch to /sys/module/{spl,zfs}/version # to make sure we get what is really loaded in the kernel try: @@ -439,7 +439,7 @@ def print_header(): """ # datetime is now recommended over time but we keep the exact formatting - # from the older version of arc_summary in case there are scripts + # from the older version of zarcsummary in case there are scripts # that expect it in this way daydate = time.strftime(DATE_FORMAT) spc_date = LINE_LENGTH-len(daydate) diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 957e6c67dd12..70a4ed46f263 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -107,7 +107,9 @@ extern uint_t zfs_reconstruct_indirect_combinations_max; extern uint_t zfs_btree_verify_intensity; static const char cmdname[] = "zdb"; -uint8_t dump_opt[256]; +uint8_t dump_opt[512]; + +#define ALLOCATED_OPT 256 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); @@ -383,7 +385,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, sublivelist_verify_block_t svb = {{{0}}}; DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid); DVA_SET_OFFSET(&svb.svb_dva, offset); - DVA_SET_ASIZE(&svb.svb_dva, size); + DVA_SET_ASIZE(&svb.svb_dva, 0); zfs_btree_index_t where; uint64_t end_offset = offset + size; @@ -1586,9 +1588,8 @@ dump_spacemap(objset_t *os, space_map_t *sm) continue; } - uint8_t words; char entry_type; - uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; + uint64_t entry_off, entry_run, entry_vdev; if (sm_entry_is_single_word(word)) { entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? @@ -1596,35 +1597,43 @@ dump_spacemap(objset_t *os, space_map_t *sm) entry_off = (SM_OFFSET_DECODE(word) << mapshift) + sm->sm_start; entry_run = SM_RUN_DECODE(word) << mapshift; - words = 1; + + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run); } else { /* it is a two-word entry so we read another word */ ASSERT(sm_entry_is_double_word(word)); uint64_t extra_word; offset += sizeof (extra_word); + ASSERT3U(offset, <, space_map_length(sm)); VERIFY0(dmu_read(os, space_map_object(sm), offset, sizeof (extra_word), &extra_word, DMU_READ_PREFETCH)); - ASSERT3U(offset, <=, space_map_length(sm)); - entry_run = SM2_RUN_DECODE(word) << mapshift; entry_vdev = SM2_VDEV_DECODE(word); entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 'A' : 'F'; entry_off = (SM2_OFFSET_DECODE(extra_word) << mapshift) + sm->sm_start; - words = 2; - } - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", - (u_longlong_t)entry_id, - entry_type, (u_longlong_t)entry_off, - (u_longlong_t)(entry_off + entry_run), - (u_longlong_t)entry_run, - (u_longlong_t)entry_vdev, words); + if (zopt_metaslab_args == 0 || + zopt_metaslab[0] == entry_vdev) { + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx " + "vdev: %llu\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run, + (u_longlong_t)entry_vdev); + } + } if (entry_type == 'A') alloc += entry_run; @@ -1660,6 +1669,16 @@ dump_metaslab_stats(metaslab_t *msp) } static void +dump_allocated(void *arg, uint64_t start, uint64_t size) +{ + uint64_t *off = arg; + if (*off != start) + (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off, + start - *off); + *off = start + size; +} + +static void dump_metaslab(metaslab_t *msp) { vdev_t *vd = msp->ms_group->mg_vd; @@ -1675,13 +1694,24 @@ dump_metaslab(metaslab_t *msp) (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt['m'] > 2 && !dump_opt['L']) { + if (dump_opt[ALLOCATED_OPT] || + (dump_opt['m'] > 2 && !dump_opt['L'])) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); + } + + if (dump_opt['m'] > 2 && !dump_opt['L']) { zfs_range_tree_stat_verify(msp->ms_allocatable); dump_metaslab_stats(msp); - metaslab_unload(msp); - mutex_exit(&msp->ms_lock); + } + + if (dump_opt[ALLOCATED_OPT]) { + uint64_t off = msp->ms_start; + zfs_range_tree_walk(msp->ms_allocatable, dump_allocated, + &off); + if (off != msp->ms_start + msp->ms_size) + (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off, + msp->ms_size - off); } if (dump_opt['m'] > 1 && sm != NULL && @@ -1696,6 +1726,12 @@ dump_metaslab(metaslab_t *msp) SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } + if (dump_opt[ALLOCATED_OPT] || + (dump_opt['m'] > 2 && !dump_opt['L'])) { + metaslab_unload(msp); + mutex_exit(&msp->ms_lock); + } + if (vd->vdev_ops == &vdev_draid_ops) ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift); else @@ -1732,8 +1768,9 @@ print_vdev_metaslab_header(vdev_t *vd) } } - (void) printf("\tvdev %10llu %s", - (u_longlong_t)vd->vdev_id, bias_str); + (void) printf("\tvdev %10llu\t%s metaslab shift %4llu", + (u_longlong_t)vd->vdev_id, bias_str, + (u_longlong_t)vd->vdev_ms_shift); if (ms_flush_data_obj != 0) { (void) printf(" ms_unflushed_phys object %llu", @@ -1873,7 +1910,7 @@ dump_metaslabs(spa_t *spa) (void) printf("\nMetaslabs:\n"); - if (!dump_opt['d'] && zopt_metaslab_args > 0) { + if (zopt_metaslab_args > 0) { c = zopt_metaslab[0]; if (c >= children) @@ -2627,8 +2664,8 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, } if (BP_GET_LEVEL(bp) != zb->zb_level) { (void) printf(" (ERROR: Block pointer level " - "(%llu) does not match bookmark level (%ld))", - BP_GET_LEVEL(bp), zb->zb_level); + "(%llu) does not match bookmark level (%lld))", + BP_GET_LEVEL(bp), (longlong_t)zb->zb_level); corruption_found = B_TRUE; } } @@ -2681,8 +2718,8 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, if (fill != BP_GET_FILL(bp)) { (void) printf("%16llx: Block pointer " "fill (%llu) does not match calculated " - "value (%lu)\n", offset, BP_GET_FILL(bp), - fill); + "value (%llu)\n", offset, BP_GET_FILL(bp), + (u_longlong_t)fill); corruption_found = B_TRUE; } } @@ -3264,6 +3301,7 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) uint64_t keyformat, salt, iters; int i; unsigned char c; + FILE *f; VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t), @@ -3296,6 +3334,25 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) break; + case ZFS_KEYFORMAT_RAW: + if ((f = fopen(key_material, "r")) == NULL) + return (B_FALSE); + + if (fread(key_out, 1, WRAPPING_KEY_LEN, f) != + WRAPPING_KEY_LEN) { + (void) fclose(f); + return (B_FALSE); + } + + /* Check the key length */ + if (fgetc(f) != EOF) { + (void) fclose(f); + return (B_FALSE); + } + + (void) fclose(f); + break; + default: fatal("no support for key format %u\n", (unsigned int) keyformat); @@ -9368,6 +9425,8 @@ main(int argc, char **argv) {"all-reconstruction", no_argument, NULL, 'Y'}, {"livelist", no_argument, NULL, 'y'}, {"zstd-headers", no_argument, NULL, 'Z'}, + {"allocated-map", no_argument, NULL, + ALLOCATED_OPT}, {0, 0, 0, 0} }; @@ -9398,6 +9457,7 @@ main(int argc, char **argv) case 'u': case 'y': case 'Z': + case ALLOCATED_OPT: dump_opt[c]++; dump_all = 0; break; diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.h b/sys/contrib/openzfs/cmd/zdb/zdb.h index 6b6c9169816b..48b561eb202c 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.h +++ b/sys/contrib/openzfs/cmd/zdb/zdb.h @@ -29,6 +29,6 @@ #define _ZDB_H void dump_intent_log(zilog_t *); -extern uint8_t dump_opt[256]; +extern uint8_t dump_opt[512]; #endif /* _ZDB_H */ diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c index 62e290cd122c..3d91fb28a4c7 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c @@ -48,8 +48,6 @@ #include "zdb.h" -extern uint8_t dump_opt[256]; - static char tab_prefix[4] = "\t\t\t"; static void diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am index 093a04c4636a..c0b161ecf248 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am +++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am @@ -9,18 +9,18 @@ dist_zedexec_SCRIPTS = \ %D%/all-debug.sh \ %D%/all-syslog.sh \ %D%/data-notify.sh \ - %D%/deadman-slot_off.sh \ + %D%/deadman-sync-slot_off.sh \ %D%/generic-notify.sh \ - %D%/pool_import-led.sh \ + %D%/pool_import-sync-led.sh \ %D%/resilver_finish-notify.sh \ %D%/resilver_finish-start-scrub.sh \ %D%/scrub_finish-notify.sh \ - %D%/statechange-led.sh \ + %D%/statechange-sync-led.sh \ %D%/statechange-notify.sh \ - %D%/statechange-slot_off.sh \ + %D%/statechange-sync-slot_off.sh \ %D%/trim_finish-notify.sh \ - %D%/vdev_attach-led.sh \ - %D%/vdev_clear-led.sh + %D%/vdev_attach-sync-led.sh \ + %D%/vdev_clear-sync-led.sh nodist_zedexec_SCRIPTS = \ %D%/history_event-zfs-list-cacher.sh @@ -30,17 +30,17 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS) zedconfdefaults = \ all-syslog.sh \ data-notify.sh \ - deadman-slot_off.sh \ + deadman-sync-slot_off.sh \ history_event-zfs-list-cacher.sh \ - pool_import-led.sh \ + pool_import-sync-led.sh \ resilver_finish-notify.sh \ resilver_finish-start-scrub.sh \ scrub_finish-notify.sh \ - statechange-led.sh \ + statechange-sync-led.sh \ statechange-notify.sh \ - statechange-slot_off.sh \ - vdev_attach-led.sh \ - vdev_clear-led.sh + statechange-sync-slot_off.sh \ + vdev_attach-sync-led.sh \ + vdev_clear-sync-led.sh dist_noinst_DATA += %D%/README diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh index 7b339b3add01..7b339b3add01 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh index 40cb61f17307..40cb61f17307 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh index 06acce93b8aa..06acce93b8aa 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed_exec.c b/sys/contrib/openzfs/cmd/zed/zed_exec.c index 036081decd64..a14af4f20a85 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_exec.c +++ b/sys/contrib/openzfs/cmd/zed/zed_exec.c @@ -196,37 +196,29 @@ _nop(int sig) (void) sig; } -static void * -_reap_children(void *arg) +static void +wait_for_children(boolean_t do_pause, boolean_t wait) { - (void) arg; - struct launched_process_node node, *pnode; pid_t pid; - int status; struct rusage usage; - struct sigaction sa = {}; - - (void) sigfillset(&sa.sa_mask); - (void) sigdelset(&sa.sa_mask, SIGCHLD); - (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); - - (void) sigemptyset(&sa.sa_mask); - sa.sa_handler = _nop; - sa.sa_flags = SA_NOCLDSTOP; - (void) sigaction(SIGCHLD, &sa, NULL); + int status; + struct launched_process_node node, *pnode; for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) { (void) pthread_mutex_lock(&_launched_processes_lock); - pid = wait4(0, &status, WNOHANG, &usage); - + pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage); if (pid == 0 || pid == (pid_t)-1) { (void) pthread_mutex_unlock(&_launched_processes_lock); - if (pid == 0 || errno == ECHILD) - pause(); - else if (errno != EINTR) + if ((pid == 0) || (errno == ECHILD)) { + if (do_pause) + pause(); + } else if (errno != EINTR) zed_log_msg(LOG_WARNING, "Failed to wait for children: %s", strerror(errno)); + if (!do_pause) + return; + } else { memset(&node, 0, sizeof (node)); node.pid = pid; @@ -278,6 +270,25 @@ _reap_children(void *arg) } } +} + +static void * +_reap_children(void *arg) +{ + (void) arg; + struct sigaction sa = {}; + + (void) sigfillset(&sa.sa_mask); + (void) sigdelset(&sa.sa_mask, SIGCHLD); + (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); + + (void) sigemptyset(&sa.sa_mask); + sa.sa_handler = _nop; + sa.sa_flags = SA_NOCLDSTOP; + (void) sigaction(SIGCHLD, &sa, NULL); + + wait_for_children(B_TRUE, B_FALSE); + return (NULL); } @@ -307,6 +318,45 @@ zed_exec_fini(void) } /* + * Check if the zedlet name indicates if it is a synchronous zedlet + * + * Synchronous zedlets have a "-sync-" immediately following the event name in + * their zedlet filename, like: + * + * EVENT_NAME-sync-ZEDLETNAME.sh + * + * For example, if you wanted a synchronous statechange script: + * + * statechange-sync-myzedlet.sh + * + * Synchronous zedlets are guaranteed to be the only zedlet running. No other + * zedlets may run in parallel with a synchronous zedlet. A synchronous + * zedlet will wait for all previously spawned zedlets to finish before running. + * Users should be careful to only use synchronous zedlets when needed, since + * they decrease parallelism. + */ +static boolean_t +zedlet_is_sync(const char *zedlet, const char *event) +{ + const char *sync_str = "-sync-"; + size_t sync_str_len; + size_t zedlet_len; + size_t event_len; + + sync_str_len = strlen(sync_str); + zedlet_len = strlen(zedlet); + event_len = strlen(event); + + if (event_len + sync_str_len >= zedlet_len) + return (B_FALSE); + + if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0) + return (B_TRUE); + + return (B_FALSE); +} + +/* * Process the event [eid] by synchronously invoking all zedlets with a * matching class prefix. * @@ -368,9 +418,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass, z = zed_strings_next(zcp->zedlets)) { for (csp = class_strings; *csp; csp++) { n = strlen(*csp); - if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) + if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) { + boolean_t is_sync = zedlet_is_sync(z, *csp); + + if (is_sync) { + /* + * Wait for previous zedlets to + * finish + */ + wait_for_children(B_FALSE, B_TRUE); + } + _zed_exec_fork_child(eid, zcp->zedlet_dir, z, e, zcp->zevent_fd, zcp->do_foreground); + + if (is_sync) { + /* + * Wait for sync zedlet we just launched + * to finish. + */ + wait_for_children(B_FALSE, B_TRUE); + } + } } } free(e); diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 235f011af953..ccdd5ffef8e6 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -914,7 +914,11 @@ zfs_do_clone(int argc, char **argv) log_history = B_FALSE; } - ret = zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET); + /* + * Dataset cloned successfully, mount/share failures are + * non-fatal. + */ + (void) zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET); } zfs_close(zhp); @@ -930,19 +934,15 @@ usage: } /* - * Return a default volblocksize for the pool which always uses more than - * half of the data sectors. This primarily applies to dRAID which always - * writes full stripe widths. + * Calculate the minimum allocation size based on the top-level vdevs. */ static uint64_t -default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) +calculate_volblocksize(nvlist_t *config) { - uint64_t volblocksize, asize = SPA_MINBLOCKSIZE; + uint64_t asize = SPA_MINBLOCKSIZE; nvlist_t *tree, **vdevs; uint_t nvdevs; - nvlist_t *config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 || nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &vdevs, &nvdevs) != 0) { @@ -973,6 +973,24 @@ default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) } } + return (asize); +} + +/* + * Return a default volblocksize for the pool which always uses more than + * half of the data sectors. This primarily applies to dRAID which always + * writes full stripe widths. + */ +static uint64_t +default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) +{ + uint64_t volblocksize, asize = SPA_MINBLOCKSIZE; + + nvlist_t *config = zpool_get_config(zhp, NULL); + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, &asize) != 0) + asize = calculate_volblocksize(config); + /* * Calculate the target volblocksize such that more than half * of the asize is used. The following table is for 4k sectors. @@ -1319,7 +1337,9 @@ zfs_do_create(int argc, char **argv) goto error; } - ret = zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET); + /* Dataset created successfully, mount/share failures are non-fatal */ + ret = 0; + (void) zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET); error: nvlist_free(props); return (ret); @@ -5303,6 +5323,7 @@ zfs_do_receive(int argc, char **argv) #define ZFS_DELEG_PERM_MOUNT "mount" #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_SEND_RAW "send:raw" #define ZFS_DELEG_PERM_RECEIVE "receive" #define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append" #define ZFS_DELEG_PERM_ALLOW "allow" @@ -5345,6 +5366,7 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + { ZFS_DELEG_PERM_SEND_RAW, ZFS_DELEG_NOTE_SEND_RAW }, { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, @@ -5929,6 +5951,10 @@ deleg_perm_comment(zfs_deleg_note_t note) case ZFS_DELEG_NOTE_SEND: str = gettext(""); break; + case ZFS_DELEG_NOTE_SEND_RAW: + str = gettext("Allow sending ONLY encrypted (raw) replication" + "\n\t\t\t\tstreams"); + break; case ZFS_DELEG_NOTE_SHARE: str = gettext("Allows sharing file systems over NFS or SMB" "\n\t\t\t\tprotocols"); @@ -6858,17 +6884,17 @@ print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl, if (scripted) { if (parsable) { - (void) printf("%s\t%s\t%ld\n", zname, - tagname, (unsigned long)time); + (void) printf("%s\t%s\t%lld\n", zname, + tagname, (long long)time); } else { (void) printf("%s\t%s\t%s\n", zname, tagname, tsbuf); } } else { if (parsable) { - (void) printf("%-*s %-*s %ld\n", + (void) printf("%-*s %-*s %lld\n", nwidth, zname, tagwidth, - tagname, (unsigned long)time); + tagname, (long long)time); } else { (void) printf("%-*s %-*s %s\n", nwidth, zname, tagwidth, diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c index 2bd3051dce7b..8ffbf91ffb30 100644 --- a/sys/contrib/openzfs/cmd/zhack.c +++ b/sys/contrib/openzfs/cmd/zhack.c @@ -54,6 +54,7 @@ #include <sys/dmu_tx.h> #include <zfeature_common.h> #include <libzutil.h> +#include <sys/metaslab_impl.h> static importargs_t g_importargs; static char *g_pool; @@ -69,7 +70,8 @@ static __attribute__((noreturn)) void usage(void) { (void) fprintf(stderr, - "Usage: zhack [-c cachefile] [-d dir] <subcommand> <args> ...\n" + "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> " + "<args> ...\n" "where <subcommand> <args> is one of the following:\n" "\n"); @@ -93,7 +95,10 @@ usage(void) " -c repair corrupted label checksums\n" " -u restore the label on a detached device\n" "\n" - " <device> : path to vdev\n"); + " <device> : path to vdev\n" + "\n" + " metaslab leak <pool>\n" + " apply allocation map from zdb to specified pool\n"); exit(1); } @@ -363,10 +368,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount + 1, tx); spa_history_log_internal(spa, "zhack feature incr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void @@ -376,10 +383,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount - 1, tx); spa_history_log_internal(spa, "zhack feature decr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void @@ -496,6 +505,186 @@ zhack_do_feature(int argc, char **argv) return (0); } +static boolean_t +strstarts(const char *a, const char *b) +{ + return (strncmp(a, b, strlen(b)) == 0); +} + +static void +metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size, + dmu_tx_t *tx) +{ + ASSERT(msp->ms_disabled); + ASSERT(MUTEX_HELD(&msp->ms_lock)); + uint64_t txg = dmu_tx_get_txg(tx); + + uint64_t off = start; + while (off < start + size) { + uint64_t ostart, osize; + boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable, + off, start + size - off, &ostart, &osize); + if (!found) + break; + zfs_range_tree_remove(msp->ms_allocatable, ostart, osize); + + if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) + vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp, + txg); + + zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart, + osize); + msp->ms_allocating_total += osize; + off = ostart + osize; + } +} + +static void +zhack_do_metaslab_leak(int argc, char **argv) +{ + int c; + char *target; + spa_t *spa; + + optind = 1; + boolean_t force = B_FALSE; + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, "error: missing pool name\n"); + usage(); + } + target = argv[0]; + + zhack_spa_open(target, B_FALSE, FTAG, &spa); + spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER); + + char *line = NULL; + size_t cap = 0; + + vdev_t *vd = NULL; + metaslab_t *prev = NULL; + dmu_tx_t *tx = NULL; + while (getline(&line, &cap, stdin) > 0) { + if (strstarts(line, "\tvdev ")) { + uint64_t vdev_id, ms_shift; + if (sscanf(line, + "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64, + &vdev_id, &ms_shift) == 1) { + VERIFY3U(sscanf(line, "\tvdev %"PRIu64 + "\t metaslab shift %4"PRIu64, + &vdev_id, &ms_shift), ==, 2); + } + vd = vdev_lookup_top(spa, vdev_id); + if (vd == NULL) { + fprintf(stderr, "error: no such vdev with " + "id %"PRIu64"\n", vdev_id); + break; + } + if (tx) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + tx = NULL; + prev = NULL; + } + if (vd->vdev_ms_shift != ms_shift) { + fprintf(stderr, "error: ms_shift mismatch: %" + PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift, + ms_shift); + break; + } + } else if (strstarts(line, "\tmetaslabs ")) { + uint64_t ms_count; + VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count), + ==, 1); + ASSERT(vd); + if (!force && vd->vdev_ms_count != ms_count) { + fprintf(stderr, "error: ms_count mismatch: %" + PRIu64" != %"PRIu64"\n", vd->vdev_ms_count, + ms_count); + break; + } + } else if (strstarts(line, "ALLOC:")) { + uint64_t start, size; + VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n", + &start, &size), ==, 2); + + ASSERT(vd); + metaslab_t *cur = + vd->vdev_ms[start >> vd->vdev_ms_shift]; + if (prev != cur) { + if (prev) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + } + ASSERT(cur); + metaslab_disable(cur); + mutex_enter(&cur->ms_lock); + metaslab_load(cur); + prev = cur; + tx = dmu_tx_create_dd( + spa_get_dsl(vd->vdev_spa)->dp_root_dir); + dmu_tx_assign(tx, DMU_TX_WAIT); + } + + metaslab_force_alloc(cur, start, size, tx); + } else { + continue; + } + } + if (tx) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + tx = NULL; + prev = NULL; + } + if (line) + free(line); + + spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG); + spa_close(spa, FTAG); +} + +static int +zhack_do_metaslab(int argc, char **argv) +{ + char *subcommand; + + argc--; + argv++; + if (argc == 0) { + (void) fprintf(stderr, + "error: no metaslab operation specified\n"); + usage(); + } + + subcommand = argv[0]; + if (strcmp(subcommand, "leak") == 0) { + zhack_do_metaslab_leak(argc, argv); + } else { + (void) fprintf(stderr, "error: unknown subcommand: %s\n", + subcommand); + usage(); + } + + return (0); +} + #define ASHIFT_UBERBLOCK_SHIFT(ashift) \ MIN(MAX(ashift, UBERBLOCK_SHIFT), \ MAX_UBERBLOCK_SHIFT) @@ -525,6 +714,23 @@ zhack_repair_read_label(const int fd, vdev_label_t *vl, return (0); } +static int +zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap) +{ + if (vdev_eck->zec_magic == ZEC_MAGIC) { + *byteswap = B_FALSE; + } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) { + *byteswap = B_TRUE; + } else { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum magic number but instead got " + "0x%" PRIx64 "\n", + l, vdev_eck->zec_magic); + return (1); + } + return (0); +} + static void zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) @@ -551,33 +757,10 @@ zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, } static int -zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, - const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg, - uint64_t *ashift) +zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift) { int err; - - if (ub->ub_txg != 0) { - (void) fprintf(stderr, - "error: label %d: UB TXG of 0 expected, but got %" - PRIu64 "\n", - l, ub->ub_txg); - (void) fprintf(stderr, "It would appear the device was not " - "properly removed.\n"); - return (1); - } - - for (int i = 0; i < cfg_keys_len; i++) { - uint64_t val; - err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); - if (err) { - (void) fprintf(stderr, - "error: label %d, %d: " - "cannot find nvlist key %s\n", - l, i, cfg_keys[i]); - return (err); - } - } + nvlist_t *vdev_tree_cfg; err = nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); @@ -601,7 +784,7 @@ zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, (void) fprintf(stderr, "error: label %d: nvlist key %s is zero\n", l, ZPOOL_CONFIG_ASHIFT); - return (err); + return (1); } return (0); @@ -616,30 +799,35 @@ zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) */ if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) { const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp); + int err; + ub->ub_txg = txg; - if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) { + err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG); + if (err) { (void) fprintf(stderr, "error: label %d: " "Failed to remove pool creation TXG\n", l); - return (1); + return (err); } - if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) { + err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG); + if (err) { (void) fprintf(stderr, "error: label %d: Failed to remove pool TXG to " "be replaced.\n", l); - return (1); + return (err); } - if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) { + err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg); + if (err) { (void) fprintf(stderr, "error: label %d: " "Failed to add pool TXG of %" PRIu64 "\n", l, txg); - return (1); + return (err); } } @@ -733,6 +921,7 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data, BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; const uint64_t actual_magic = vdev_eck->zec_magic; int err = 0; + if (actual_magic != expected_magic) { (void) fprintf(stderr, "error: label %d: " "Expected " @@ -754,6 +943,36 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data, return (err); } +static int +zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg) +{ + const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, + ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; + int err; + + err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, + VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0); + if (err) { + (void) fprintf(stderr, + "error: cannot unpack nvlist label %d\n", l); + return (err); + } + + for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { + uint64_t val; + err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val); + if (err) { + (void) fprintf(stderr, + "error: label %d, %d: " + "cannot find nvlist key %s\n", + l, i, cfg_keys[i]); + return (err); + } + } + + return (0); +} + static void zhack_repair_one_label(const zhack_repair_op_t op, const int fd, vdev_label_t *vl, const uint64_t label_offset, const int l, @@ -767,10 +986,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; const uint64_t vdev_phys_offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys); - const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, - ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; nvlist_t *cfg; - nvlist_t *vdev_tree_cfg = NULL; uint64_t ashift; int byteswap; @@ -778,18 +994,9 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, if (err) return; - if (vdev_eck->zec_magic == 0) { - (void) fprintf(stderr, "error: label %d: " - "Expected the nvlist checksum magic number to not be zero" - "\n", - l); - (void) fprintf(stderr, "There should already be a checksum " - "for the label.\n"); + err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap); + if (err) return; - } - - byteswap = - (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)); if (byteswap) { byteswap_uint64_array(&vdev_eck->zec_cksum, @@ -805,16 +1012,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, return; } - err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, - VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); - if (err) { - (void) fprintf(stderr, - "error: cannot unpack nvlist label %d\n", l); - return; - } - - err = zhack_repair_check_label(ub, - l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift); + err = zhack_repair_unpack_cfg(vl, l, &cfg); if (err) return; @@ -822,6 +1020,19 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, char *buf; size_t buflen; + if (ub->ub_txg != 0) { + (void) fprintf(stderr, + "error: label %d: UB TXG of 0 expected, but got %" + PRIu64 "\n", l, ub->ub_txg); + (void) fprintf(stderr, "It would appear the device was " + "not properly detached.\n"); + return; + } + + err = zhack_repair_get_ashift(cfg, l, &ashift); + if (err) + return; + err = zhack_repair_undetach(ub, cfg, l); if (err) return; @@ -981,7 +1192,7 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); zfs_prop_init(); - while ((c = getopt(argc, argv, "+c:d:")) != -1) { + while ((c = getopt(argc, argv, "+c:d:o:")) != -1) { switch (c) { case 'c': g_importargs.cachefile = optarg; @@ -990,6 +1201,10 @@ main(int argc, char **argv) assert(g_importargs.paths < MAX_NUM_PATHS); g_importargs.path[g_importargs.paths++] = optarg; break; + case 'o': + if (handle_tunable_option(optarg, B_FALSE) != 0) + exit(1); + break; default: usage(); break; @@ -1011,6 +1226,8 @@ main(int argc, char **argv) rv = zhack_do_feature(argc, argv); } else if (strcmp(subcommand, "label") == 0) { return (zhack_do_label(argc, argv)); + } else if (strcmp(subcommand, "metaslab") == 0) { + rv = zhack_do_metaslab(argc, argv); } else { (void) fprintf(stderr, "error: unknown subcommand: %s\n", subcommand); diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c index 113797c878b9..c2f646f2567d 100644 --- a/sys/contrib/openzfs/cmd/zinject/zinject.c +++ b/sys/contrib/openzfs/cmd/zinject/zinject.c @@ -107,6 +107,8 @@ * zinject * zinject <-a | -u pool> * zinject -c <id|all> + * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range] + * [-T iotype] [-t type object | -b bookmark pool] * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] * [-r range] <object> * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool @@ -132,14 +134,18 @@ * The '-f' flag controls the frequency of errors injected, expressed as a * real number percentage between 0.0001 and 100. The default is 100. * - * The this form is responsible for actually injecting the handler into the + * The <object> form is responsible for actually injecting the handler into the * framework. It takes the arguments described above, translates them to the * internal tuple using libzpool, and then issues an ioctl() to register the * handler. * - * The final form can target a specific bookmark, regardless of whether a + * The '-b' option can target a specific bookmark, regardless of whether a * human-readable interface has been designed. It allows developers to specify * a particular block by number. + * + * The '-E' option injects pipeline ready stage delays for the given object or + * bookmark. The delay is specified in milliseconds, and it supports I/O type + * and range filters. */ #include <errno.h> @@ -346,6 +352,13 @@ usage(void) "\t\tsuch that the operation takes a minimum of supplied seconds\n" "\t\tto complete.\n" "\n" + "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n" + "\t\t[-T iotype] [-t type object | -b bookmark pool]\n" + "\n" + "\t\tInject pipeline ready stage delays for the given object path\n" + "\t\t(data or dnode) or raw bookmark. The delay is specified in\n" + "\t\tmilliseconds.\n" + "\n" "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" "\t\tCause the pool to stop writing blocks yet not\n" "\t\treport errors for a duration. Simulates buggy hardware\n" @@ -724,12 +737,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record, if (quiet) { (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); } else { + boolean_t show_object = B_FALSE; + boolean_t show_iotype = B_FALSE; (void) printf("Added handler %llu with the following " "properties:\n", (u_longlong_t)zc.zc_guid); (void) printf(" pool: %s\n", pool); if (record->zi_guid) { (void) printf(" vdev: %llx\n", (u_longlong_t)record->zi_guid); + show_iotype = B_TRUE; } else if (record->zi_func[0] != '\0') { (void) printf(" panic function: %s\n", record->zi_func); @@ -742,7 +758,18 @@ register_handler(const char *pool, int flags, zinject_record_t *record, } else if (record->zi_timer > 0) { (void) printf(" timer: %lld ms\n", (u_longlong_t)NSEC2MSEC(record->zi_timer)); + if (record->zi_cmd == ZINJECT_DELAY_READY) { + show_object = B_TRUE; + show_iotype = B_TRUE; + } } else { + show_object = B_TRUE; + } + if (show_iotype) { + (void) printf("iotype: %s\n", + iotype_to_str(record->zi_iotype)); + } + if (show_object) { (void) printf("objset: %llu\n", (u_longlong_t)record->zi_objset); (void) printf("object: %llu\n", @@ -910,6 +937,7 @@ main(int argc, char **argv) int ret; int flags = 0; uint32_t dvas = 0; + hrtime_t ready_delay = -1; if ((g_zfs = libzfs_init()) == NULL) { (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); @@ -940,7 +968,7 @@ main(int argc, char **argv) } while ((c = getopt(argc, argv, - ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { + ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -1113,6 +1141,18 @@ main(int argc, char **argv) case 'u': flags |= ZINJECT_UNLOAD_SPA; break; + case 'E': + ready_delay = MSEC2NSEC(strtol(optarg, &end, 10)); + if (ready_delay <= 0 || *end != '\0') { + (void) fprintf(stderr, "invalid delay '%s': " + "must be a positive duration\n", optarg); + usage(); + libzfs_fini(g_zfs); + return (1); + } + record.zi_cmd = ZINJECT_DELAY_READY; + record.zi_timer = ready_delay; + break; case 'L': if ((label = name_to_type(optarg)) == TYPE_INVAL && !LABEL_TYPE(type)) { @@ -1150,7 +1190,7 @@ main(int argc, char **argv) */ if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || - record.zi_freq > 0 || dvas != 0) { + record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) { (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); @@ -1186,7 +1226,7 @@ main(int argc, char **argv) */ if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || - dvas != 0) { + dvas != 0 || ready_delay >= 0) { (void) fprintf(stderr, "device (-d) incompatible with " "data error injection\n"); usage(); @@ -1276,13 +1316,23 @@ main(int argc, char **argv) return (1); } - record.zi_cmd = ZINJECT_DATA_FAULT; + if (record.zi_cmd == ZINJECT_UNINITIALIZED) { + record.zi_cmd = ZINJECT_DATA_FAULT; + if (!error) + error = EIO; + } else if (error != 0) { + (void) fprintf(stderr, "error type -e incompatible " + "with delay injection\n"); + libzfs_fini(g_zfs); + return (1); + } else { + record.zi_iotype = io_type; + } + if (translate_raw(raw, &record) != 0) { libzfs_fini(g_zfs); return (1); } - if (!error) - error = EIO; } else if (record.zi_cmd == ZINJECT_PANIC) { if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || device != NULL || record.zi_freq > 0 || @@ -1410,6 +1460,13 @@ main(int argc, char **argv) record.zi_dvas = dvas; } + if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) { + (void) fprintf(stderr, "error type -e incompatible " + "with delay injection\n"); + libzfs_fini(g_zfs); + return (1); + } + if (error == EACCES) { if (type != TYPE_DATA) { (void) fprintf(stderr, "decryption errors " @@ -1425,8 +1482,12 @@ main(int argc, char **argv) * not found. */ error = ECKSUM; - } else { + } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) { record.zi_cmd = ZINJECT_DATA_FAULT; + if (!error) + error = EIO; + } else { + record.zi_iotype = io_type; } if (translate_record(type, argv[0], range, level, &record, pool, @@ -1434,8 +1495,6 @@ main(int argc, char **argv) libzfs_fini(g_zfs); return (1); } - if (!error) - error = EIO; } /* diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am index 2f962408e5a3..5bb6d8160b18 100644 --- a/sys/contrib/openzfs/cmd/zpool/Makefile.am +++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am @@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \ %D%/compatibility.d/openzfs-2.1-linux \ %D%/compatibility.d/openzfs-2.2 \ %D%/compatibility.d/openzfs-2.3 \ + %D%/compatibility.d/openzfs-2.4 \ %D%/compatibility.d/openzfsonosx-1.7.0 \ %D%/compatibility.d/openzfsonosx-1.8.1 \ %D%/compatibility.d/openzfsonosx-1.9.3 \ @@ -187,7 +188,9 @@ zpoolcompatlinks = \ "openzfs-2.2 openzfs-2.2-linux" \ "openzfs-2.2 openzfs-2.2-freebsd" \ "openzfs-2.3 openzfs-2.3-linux" \ - "openzfs-2.3 openzfs-2.3-freebsd" + "openzfs-2.3 openzfs-2.3-freebsd" \ + "openzfs-2.4 openzfs-2.4-linux" \ + "openzfs-2.4 openzfs-2.4-freebsd" zpoolconfdir = $(sysconfdir)/zfs/zpool.d INSTALL_DATA_HOOKS += zpool-install-data-hook diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 new file mode 100644 index 000000000000..3fbd91014c95 --- /dev/null +++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 @@ -0,0 +1,48 @@ +# Features supported by OpenZFS 2.4 on Linux and FreeBSD +allocation_classes +async_destroy +blake3 +block_cloning +block_cloning_endian +bookmark_v2 +bookmark_written +bookmarks +device_rebuild +device_removal +draid +dynamic_gang_header +edonr +embedded_data +empty_bpobj +enabled_txg +encryption +extensible_dataset +fast_dedup +filesystem_limits +head_errlog +hole_birth +large_blocks +large_dnode +large_microzap +livelist +log_spacemap +longname +lz4_compress +multi_vdev_crash_dump +obsolete_counts +physical_rewrite +project_quota +raidz_expansion +redacted_datasets +redaction_bookmarks +redaction_list_spill +resilver_defer +sha512 +skein +spacemap_histogram +spacemap_v2 +userobj_accounting +vdev_zaps_v2 +zilsaxattr +zpool_checkpoint +zstd_compress diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c index 2eec9a95e24c..fef602736705 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c @@ -26,6 +26,7 @@ /* * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright (c) 2025, Klara, Inc. */ #include <libintl.h> @@ -52,7 +53,7 @@ typedef struct zpool_node { zpool_handle_t *zn_handle; uu_avl_node_t zn_avlnode; - int zn_mark; + hrtime_t zn_last_refresh; } zpool_node_t; struct zpool_list { @@ -62,6 +63,7 @@ struct zpool_list { uu_avl_pool_t *zl_pool; zprop_list_t **zl_proplist; zfs_type_t zl_type; + hrtime_t zl_last_refresh; }; static int @@ -81,26 +83,30 @@ zpool_compare(const void *larg, const void *rarg, void *unused) * of known pools. */ static int -add_pool(zpool_handle_t *zhp, void *data) +add_pool(zpool_handle_t *zhp, zpool_list_t *zlp) { - zpool_list_t *zlp = data; - zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); + zpool_node_t *node, *new = safe_malloc(sizeof (zpool_node_t)); uu_avl_index_t idx; - node->zn_handle = zhp; - uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); - if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { + new->zn_handle = zhp; + uu_avl_node_init(new, &new->zn_avlnode, zlp->zl_pool); + + node = uu_avl_find(zlp->zl_avl, new, NULL, &idx); + if (node == NULL) { if (zlp->zl_proplist && zpool_expand_proplist(zhp, zlp->zl_proplist, zlp->zl_type, zlp->zl_literal) != 0) { zpool_close(zhp); - free(node); + free(new); return (-1); } - uu_avl_insert(zlp->zl_avl, node, idx); + new->zn_last_refresh = zlp->zl_last_refresh; + uu_avl_insert(zlp->zl_avl, new, idx); } else { + zpool_refresh_stats_from_handle(node->zn_handle, zhp); + node->zn_last_refresh = zlp->zl_last_refresh; zpool_close(zhp); - free(node); + free(new); return (-1); } @@ -108,6 +114,18 @@ add_pool(zpool_handle_t *zhp, void *data) } /* + * add_pool(), but always returns 0. This allows zpool_iter() to continue + * even if a pool exists in the tree, or we fail to get the properties for + * a new one. + */ +static int +add_pool_cb(zpool_handle_t *zhp, void *data) +{ + (void) add_pool(zhp, data); + return (0); +} + +/* * Create a list of pools based on the given arguments. If we're given no * arguments, then iterate over all pools in the system and add them to the AVL * tree. Otherwise, add only those pool explicitly specified on the command @@ -135,9 +153,10 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type, zlp->zl_type = type; zlp->zl_literal = literal; + zlp->zl_last_refresh = gethrtime(); if (argc == 0) { - (void) zpool_iter(g_zfs, add_pool, zlp); + (void) zpool_iter(g_zfs, add_pool_cb, zlp); zlp->zl_findall = B_TRUE; } else { int i; @@ -159,15 +178,61 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type, } /* - * Search for any new pools, adding them to the list. We only add pools when no - * options were given on the command line. Otherwise, we keep the list fixed as - * those that were explicitly specified. + * Refresh the state of all pools on the list. Additionally, if no options were + * given on the command line, add any new pools and remove any that are no + * longer available. */ -void -pool_list_update(zpool_list_t *zlp) +int +pool_list_refresh(zpool_list_t *zlp) { - if (zlp->zl_findall) - (void) zpool_iter(g_zfs, add_pool, zlp); + zlp->zl_last_refresh = gethrtime(); + + if (!zlp->zl_findall) { + /* + * This list is a fixed list of pools, so we must not add + * or remove any. Just walk over them and refresh their + * state. + */ + int navail = 0; + for (zpool_node_t *node = uu_avl_first(zlp->zl_avl); + node != NULL; node = uu_avl_next(zlp->zl_avl, node)) { + boolean_t missing; + zpool_refresh_stats(node->zn_handle, &missing); + navail += !missing; + node->zn_last_refresh = zlp->zl_last_refresh; + } + return (navail); + } + + /* Search for any new pools and add them to the list. */ + (void) zpool_iter(g_zfs, add_pool_cb, zlp); + + /* Walk the list of existing pools, and update or remove them. */ + zpool_node_t *node, *next; + for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next) { + next = uu_avl_next(zlp->zl_avl, node); + + /* + * Skip any that were refreshed and are online; they were added + * by zpool_iter() and are already up to date. + */ + if (node->zn_last_refresh == zlp->zl_last_refresh && + zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL) + continue; + + /* Refresh and remove if necessary. */ + boolean_t missing; + zpool_refresh_stats(node->zn_handle, &missing); + if (missing) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } else { + node->zn_last_refresh = zlp->zl_last_refresh; + } + } + + return (uu_avl_numnodes(zlp->zl_avl)); } /* @@ -191,23 +256,6 @@ pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, } /* - * Remove the given pool from the list. When running iostat, we want to remove - * those pools that no longer exist. - */ -void -pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) -{ - zpool_node_t search, *node; - - search.zn_handle = zhp; - if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { - uu_avl_remove(zlp->zl_avl, node); - zpool_close(node->zn_handle); - free(node); - } -} - -/* * Free all the handles associated with this list. */ void diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 237e558da65b..1feec55c0e8b 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -33,7 +33,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com> * Copyright (c) 2021, Colm Buckley <colm@tuatha.org> - * Copyright (c) 2021, 2023, Klara Inc. + * Copyright (c) 2021, 2023, 2025, Klara, Inc. * Copyright (c) 2021, 2025 Hewlett Packard Enterprise Development LP. */ @@ -456,7 +456,7 @@ get_usage(zpool_help_t idx) "<pool> <vdev> ...\n")); case HELP_ATTACH: return (gettext("\tattach [-fsw] [-o property=value] " - "<pool> <device> <new-device>\n")); + "<pool> <vdev> <new-device>\n")); case HELP_CLEAR: return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n")); case HELP_CREATE: @@ -5761,24 +5761,6 @@ children: return (ret); } -static int -refresh_iostat(zpool_handle_t *zhp, void *data) -{ - iostat_cbdata_t *cb = data; - boolean_t missing; - - /* - * If the pool has disappeared, remove it from the list and continue. - */ - if (zpool_refresh_stats(zhp, &missing) != 0) - return (-1); - - if (missing) - pool_list_remove(cb->cb_list, zhp); - - return (0); -} - /* * Callback to print out the iostats for the given pool. */ @@ -6359,15 +6341,14 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data) * This command can be tricky because we want to be able to deal with pool * creation/destruction as well as vdev configuration changes. The bulk of this * processing is handled by the pool_list_* routines in zpool_iter.c. We rely - * on pool_list_update() to detect the addition of new pools. Configuration - * changes are all handled within libzfs. + * on pool_list_refresh() to detect the addition and removal of pools. + * Configuration changes are all handled within libzfs. */ int zpool_do_iostat(int argc, char **argv) { int c; int ret; - int npools; float interval = 0; unsigned long count = 0; zpool_list_t *list; @@ -6618,10 +6599,24 @@ zpool_do_iostat(int argc, char **argv) return (1); } + int last_npools = 0; for (;;) { - if ((npools = pool_list_count(list)) == 0) + /* + * Refresh all pools in list, adding or removing pools as + * necessary. + */ + int npools = pool_list_refresh(list); + if (npools == 0) { (void) fprintf(stderr, gettext("no pools available\n")); - else { + } else { + /* + * If the list of pools has changed since last time + * around, reset the iteration count to force the + * header to be redisplayed. + */ + if (last_npools != npools) + cb.cb_iteration = 0; + /* * If this is the first iteration and -y was supplied * we skip any printing. @@ -6630,15 +6625,6 @@ zpool_do_iostat(int argc, char **argv) cb.cb_iteration == 0); /* - * Refresh all statistics. This is done as an - * explicit step before calculating the maximum name - * width, so that any * configuration changes are - * properly accounted for. - */ - (void) pool_list_iter(list, B_FALSE, refresh_iostat, - &cb); - - /* * Iterate over all pools to determine the maximum width * for the pool / device name column across all pools. */ @@ -6691,6 +6677,7 @@ zpool_do_iostat(int argc, char **argv) if (skip) { (void) fflush(stdout); (void) fsleep(interval); + last_npools = npools; continue; } @@ -6728,6 +6715,8 @@ zpool_do_iostat(int argc, char **argv) (void) fflush(stdout); (void) fsleep(interval); + + last_npools = npools; } pool_list_free(list); @@ -7644,7 +7633,7 @@ zpool_do_replace(int argc, char **argv) } /* - * zpool attach [-fsw] [-o property=value] <pool> <device>|<vdev> <new_device> + * zpool attach [-fsw] [-o property=value] <pool> <vdev> <new_device> * * -f Force attach, even if <new_device> appears to be in use. * -s Use sequential instead of healing reconstruction for resilver. @@ -7652,9 +7641,9 @@ zpool_do_replace(int argc, char **argv) * -w Wait for resilvering (mirror) or expansion (raidz) to complete * before returning. * - * Attach <new_device> to a <device> or <vdev>, where the vdev can be of type - * mirror or raidz. If <device> is not part of a mirror, then <device> will - * be transformed into a mirror of <device> and <new_device>. When a mirror + * Attach <new_device> to a <vdev>, where the vdev can be of type + * device, mirror or raidz. If <vdev> is not part of a mirror, then <vdev> will + * be transformed into a mirror of <vdev> and <new_device>. When a mirror * is involved, <new_device> will begin life with a DTL of [0, now], and will * immediately begin to resilver itself. For the raidz case, a expansion will * commence and reflow the raidz data across all the disks including the diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_util.h b/sys/contrib/openzfs/cmd/zpool/zpool_util.h index 5ab7cb9750f1..3af23c52bd45 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_util.h +++ b/sys/contrib/openzfs/cmd/zpool/zpool_util.h @@ -76,11 +76,10 @@ typedef struct zpool_list zpool_list_t; zpool_list_t *pool_list_get(int, char **, zprop_list_t **, zfs_type_t, boolean_t, int *); -void pool_list_update(zpool_list_t *); +int pool_list_refresh(zpool_list_t *); int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); void pool_list_free(zpool_list_t *); int pool_list_count(zpool_list_t *); -void pool_list_remove(zpool_list_t *, zpool_handle_t *); extern libzfs_handle_t *g_zfs; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 684b46a2d673..088c0108e911 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -609,22 +609,28 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) ZPOOL_CONFIG_PATH, &path) == 0); /* + * Skip active spares they should never cause + * the pool to be evaluated as inconsistent. + */ + if (is_spare(NULL, path)) + continue; + + /* * If we have a raidz/mirror that combines disks - * with files, report it as an error. + * with files, only report it as an error when + * fatal is set to ensure all the replication + * checks aren't skipped in check_replication(). */ - if (!dontreport && type != NULL && + if (fatal && !dontreport && type != NULL && strcmp(type, childtype) != 0) { if (ret != NULL) free(ret); ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication " - "level: %s contains both " - "files and devices\n"), - rep.zprl_type); - else - return (NULL); + vdev_error(gettext( + "mismatched replication " + "level: %s contains both " + "files and devices\n"), + rep.zprl_type); dontreport = B_TRUE; } diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am index be3539fe905d..80ef1ea7ca11 100644 --- a/sys/contrib/openzfs/cmd/zstream/Makefile.am +++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am @@ -18,6 +18,7 @@ zstream_LDADD = \ libzpool.la \ libnvpair.la -PHONY += install-exec-hook -install-exec-hook: +cmd-zstream-install-exec-hook: cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump + +INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook diff --git a/sys/contrib/openzfs/config/Shellcheck.am b/sys/contrib/openzfs/config/Shellcheck.am index 1ab13516066c..87e6494056cf 100644 --- a/sys/contrib/openzfs/config/Shellcheck.am +++ b/sys/contrib/openzfs/config/Shellcheck.am @@ -16,10 +16,14 @@ SHELLCHECK_OPTS = $(call JUST_SHELLCHECK_OPTS,$(1)) $(call JUST_CHECKBAS PHONY += shellcheck +shellcheck_verbose = $(shellcheck_verbose_@AM_V@) +shellcheck_verbose_ = $(shellcheck_verbose_@AM_DEFAULT_V@) +shellcheck_verbose_0 = @echo SHELLCHECK $(_STGT); + _STGT = $(subst ^,/,$(subst shellcheck-here-,,$@)) shellcheck-here-%: if HAVE_SHELLCHECK - shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)" + $(shellcheck_verbose)shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)" else @echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed" endif @@ -29,11 +33,15 @@ shellcheck: $(SHELLCHECKSCRIPTS) $(call JUST_SHELLCHECK_OPTS,$(SHELLCHECKSCRIPTS PHONY += checkbashisms +checkbashisms_verbose = $(checkbashisms_verbose_@AM_V@) +checkbashisms_verbose_ = $(checkbashisms_verbose_@AM_DEFAULT_V@) +checkbashisms_verbose_0 = @echo CHECKBASHISMS $(_BTGT); + # command -v *is* specified by POSIX and every shell in existence supports it _BTGT = $(subst ^,/,$(subst checkbashisms-here-,,$@)) checkbashisms-here-%: if HAVE_CHECKBASHISMS - ! { [ -n "$(SHELLCHECK_SHELL)" ] && echo '#!/bin/$(SHELLCHECK_SHELL)'; cat "$$([ -e "$(_BTGT)" ] || echo "$(srcdir)/")$(_BTGT)"; } | \ + $(checkbashisms_verbose)! { [ -n "$(SHELLCHECK_SHELL)" ] && echo '#!/bin/$(SHELLCHECK_SHELL)'; cat "$$([ -e "$(_BTGT)" ] || echo "$(srcdir)/")$(_BTGT)"; } | \ checkbashisms -npx 2>&1 | grep -vFe "'command' with option other than -p" -e 'command -v' -e 'any possible bashisms' $(CHECKBASHISMS_IGNORE) >&2 else @echo "skipping checkbashisms of" $(_BTGT) "because checkbashisms is not installed" diff --git a/sys/contrib/openzfs/config/always-arch.m4 b/sys/contrib/openzfs/config/always-arch.m4 index 9f413eeddf95..d73b878916cb 100644 --- a/sys/contrib/openzfs/config/always-arch.m4 +++ b/sys/contrib/openzfs/config/always-arch.m4 @@ -34,8 +34,26 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [ esac AM_CONDITIONAL([TARGET_CPU_AARCH64], test $TARGET_CPU = aarch64) + AM_CONDITIONAL([TARGET_CPU_I386], test $TARGET_CPU = i386) AM_CONDITIONAL([TARGET_CPU_X86_64], test $TARGET_CPU = x86_64) AM_CONDITIONAL([TARGET_CPU_POWERPC], test $TARGET_CPU = powerpc) AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64) AM_CONDITIONAL([TARGET_CPU_ARM], test $TARGET_CPU = arm) ]) +dnl # +dnl # Check for conflicting environment variables +dnl # +dnl # If ARCH env variable is set up, then kernel Makefile in the /usr/src/kernel +dnl # can misbehave during the zfs ./configure test of the module compilation. +AC_DEFUN([ZFS_AC_CONFIG_CHECK_ARCH_VAR], [ + AC_MSG_CHECKING([for conflicting environment variables]) + if test -n "$ARCH"; then + AC_MSG_RESULT([warning]) + AC_MSG_WARN(m4_normalize([ARCH environment variable is set to "$ARCH". + This can cause build kernel modules support check failure. + Please unset it.])) + else + AC_MSG_RESULT([done]) + fi +]) + diff --git a/sys/contrib/openzfs/config/always-compiler-options.m4 b/sys/contrib/openzfs/config/always-compiler-options.m4 index 6383b12506ee..0e96435e3713 100644 --- a/sys/contrib/openzfs/config/always-compiler-options.m4 +++ b/sys/contrib/openzfs/config/always-compiler-options.m4 @@ -156,6 +156,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [ ]) dnl # +dnl # Check if kernel cc supports -Wno-format-zero-length option. +dnl # +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH], [ + saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length]) + + saved_flags="$CFLAGS" + CFLAGS="$CFLAGS -Werror -Wno-format-zero-length" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ + KERNEL_NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length + AC_MSG_RESULT([yes]) + ], [ + KERNEL_NO_FORMAT_ZERO_LENGTH= + AC_MSG_RESULT([no]) + ]) + + CC="$saved_cc" + CFLAGS="$saved_flags" + AC_SUBST([KERNEL_NO_FORMAT_ZERO_LENGTH]) +]) + +dnl # dnl # Check if cc supports -Wno-clobbered option. dnl # dnl # We actually invoke it with the -Wclobbered option @@ -182,6 +210,27 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED], [ ]) dnl # +dnl # Check if cc supports -Wno-atomic-alignment option. +dnl # +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_ATOMIC_ALIGNMENT], [ + AC_MSG_CHECKING([whether $CC supports -Wno-atomic-alignment]) + + saved_flags="$CFLAGS" + CFLAGS="$CFLAGS -Werror -Wno-atomic-alignment" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ + NO_ATOMIC_ALIGNMENT=-Wno-atomic-alignment + AC_MSG_RESULT([yes]) + ], [ + NO_ATOMIC_ALIGNMENT= + AC_MSG_RESULT([no]) + ]) + + CFLAGS="$saved_flags" + AC_SUBST([NO_ATOMIC_ALIGNMENT]) +]) + +dnl # dnl # Check if cc supports -Wimplicit-fallthrough option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [ @@ -231,20 +280,17 @@ dnl # dnl # Check if kernel cc supports -Winfinite-recursion option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [ - AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion]) - saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion]) + saved_flags="$CFLAGS" - CC="gcc" CFLAGS="$CFLAGS -Werror -Winfinite-recursion" - AS_IF([ test -n "$KERNEL_CC" ], [ - CC="$KERNEL_CC" - ]) - AS_IF([ test -n "$KERNEL_LLVM" ], [ - CC="clang" - ]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ KERNEL_INFINITE_RECURSION=-Winfinite-recursion AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1, @@ -329,20 +375,17 @@ dnl # dnl # Check if kernel cc supports -fno-ipa-sra option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [ - AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra]) - saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra]) + saved_flags="$CFLAGS" - CC="gcc" CFLAGS="$CFLAGS -Werror -fno-ipa-sra" - AS_IF([ test -n "$KERNEL_CC" ], [ - CC="$KERNEL_CC" - ]) - AS_IF([ test -n "$KERNEL_LLVM" ], [ - CC="clang" - ]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ KERNEL_NO_IPA_SRA=-fno-ipa-sra AC_MSG_RESULT([yes]) diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4 index 83190c6fbe3f..02011bf39fb2 100644 --- a/sys/contrib/openzfs/config/kernel-blkdev.m4 +++ b/sys/contrib/openzfs/config/kernel-blkdev.m4 @@ -29,9 +29,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - bdev = blkdev_get_by_path(path, mode, holder, &h); + bdev = blkdev_get_by_path(path, mode, holder, NULL); ]) ]) @@ -48,9 +47,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - bdh = bdev_open_by_path(path, mode, holder, &h); + bdh = bdev_open_by_path(path, mode, holder, NULL); ]) ]) @@ -68,9 +66,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - file = bdev_file_open_by_path(path, mode, holder, &h); + file = bdev_file_open_by_path(path, mode, holder, NULL); ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 index aa5a9f2aff39..ce0e6e5be959 100644 --- a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 @@ -24,6 +24,9 @@ dnl # dnl # 2.6.38 API change dnl # Added d_set_d_op() helper function. dnl # +dnl # 6.17 API change +dnl # d_set_d_op() removed. No direct replacement. +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ ZFS_LINUX_TEST_SRC([d_set_d_op], [ #include <linux/dcache.h> @@ -34,22 +37,46 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ AC_MSG_CHECKING([whether d_set_d_op() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op], - [d_set_d_op], [fs/dcache.c], [ + ZFS_LINUX_TEST_RESULT([d_set_d_op], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_SET_D_OP, 1, + [Define if d_set_d_op() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 6.17 API change +dnl # sb->s_d_op removed; set_default_d_op(sb, dop) added +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_DEFAULT_D_OP], [ + ZFS_LINUX_TEST_SRC([set_default_d_op], [ + #include <linux/dcache.h> + ], [ + set_default_d_op(NULL, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SET_DEFAULT_D_OP], [ + AC_MSG_CHECKING([whether set_default_d_op() is available]) + ZFS_LINUX_TEST_RESULT([set_default_d_op], [ AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SET_DEFAULT_D_OP, 1, + [Define if set_default_d_op() is available]) ], [ - ZFS_LINUX_TEST_ERROR([d_set_d_op]) + AC_MSG_RESULT(no) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS ZFS_AC_KERNEL_SRC_D_SET_D_OP - ZFS_AC_KERNEL_SRC_S_D_OP + ZFS_AC_KERNEL_SRC_SET_DEFAULT_D_OP ]) AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ ZFS_AC_KERNEL_D_OBTAIN_ALIAS ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_S_D_OP + ZFS_AC_KERNEL_SET_DEFAULT_D_OP ]) diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index e3e7625db7d8..35819e4d68c5 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -70,6 +70,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_COMMIT_METADATA ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_SRC_DENTRY ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT @@ -188,6 +189,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_DENTRY ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT diff --git a/sys/contrib/openzfs/config/user-statx.m4 b/sys/contrib/openzfs/config/user-statx.m4 index 0315f93e0c20..1ba74a40e9b8 100644 --- a/sys/contrib/openzfs/config/user-statx.m4 +++ b/sys/contrib/openzfs/config/user-statx.m4 @@ -2,7 +2,7 @@ dnl # dnl # Check for statx() function and STATX_MNT_ID availability dnl # AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ - AC_CHECK_HEADERS([linux/stat.h], + AC_CHECK_HEADERS([sys/stat.h], [have_stat_headers=yes], [have_stat_headers=no]) @@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ AC_MSG_CHECKING([for STATX_MNT_ID]) AC_COMPILE_IFELSE([ AC_LANG_PROGRAM([[ - #include <linux/stat.h> + #include <sys/stat.h> ]], [[ struct statx stx; int mask = STATX_MNT_ID; @@ -29,6 +29,6 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ ]) ]) ], [ - AC_MSG_WARN([linux/stat.h not found; skipping statx support]) + AC_MSG_WARN([sys/stat.h not found; skipping statx support]) ]) ]) dnl end AC_DEFUN diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index 7cf1b02d8757..161d390466db 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -252,10 +252,12 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION + ZFS_AC_CONFIG_ALWAYS_CC_NO_ATOMIC_ALIGNMENT ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH + ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA @@ -265,6 +267,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD ZFS_AC_CONFIG_ALWAYS_SYSTEM ZFS_AC_CONFIG_ALWAYS_ARCH + ZFS_AC_CONFIG_CHECK_ARCH_VAR ZFS_AC_CONFIG_ALWAYS_PYTHON ZFS_AC_CONFIG_ALWAYS_PYZFS ZFS_AC_CONFIG_ALWAYS_SED diff --git a/sys/contrib/openzfs/contrib/debian/copyright b/sys/contrib/openzfs/contrib/debian/copyright index 65c7d209d8eb..006f32fdf924 100644 --- a/sys/contrib/openzfs/contrib/debian/copyright +++ b/sys/contrib/openzfs/contrib/debian/copyright @@ -4,7 +4,7 @@ The detailed contributor information can be found in [2][3]. Files: contrib/debian/* Copyright: - 2013-2016, Aron Xu <aron@debian.org> + 2013-2025, Aron Xu <aron@debian.org> 2016, Petter Reinholdtsen <pere@hungry.com> 2013, Carlos Alberto Lopez Perez <clopez@igalia.com> 2013, Turbo Fredriksson <turbo@bayour.com> @@ -12,6 +12,8 @@ Copyright: 2011-2013, Darik Horn <dajhorn@vanadac.com> 2018-2019, Mo Zhou <cdluminate@gmail.com> 2018-2020, Mo Zhou <lumin@debian.org> + 2023-2024, Shengqi Chen <harry-chen@outlook.com> + 2024-2025, Shengqi Chen <harry@debian.org> License: GPL-2+ [1] https://tracker.debian.org/pkg/zfs-linux diff --git a/sys/contrib/openzfs/contrib/debian/not-installed b/sys/contrib/openzfs/contrib/debian/not-installed index 88557f76fcae..9c08da5a6a7b 100644 --- a/sys/contrib/openzfs/contrib/debian/not-installed +++ b/sys/contrib/openzfs/contrib/debian/not-installed @@ -1,4 +1,4 @@ -usr/bin/arc_summary.py +usr/bin/zarcsummary.py usr/share/zfs/zfs-helpers.sh etc/default/zfs etc/init.d @@ -9,4 +9,4 @@ etc/zfs/vdev_id.conf.sas_direct.example etc/zfs/vdev_id.conf.sas_switch.example etc/zfs/vdev_id.conf.scsi.example etc/zfs/zfs-functions -lib/systemd/system/zfs-import.service +usr/lib/systemd/system/zfs-import.service diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in index ed7b541e3607..fce542270dd8 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in @@ -1 +1 @@ -lib/@DEB_HOST_MULTIARCH@/libnvpair.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libnvpair.so.* diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install index c33123f69a8d..bafdebe9bb91 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install @@ -1,2 +1,2 @@ -lib/*/security/pam_zfs_key.so +usr/lib/*/security/pam_zfs_key.so usr/share/pam-configs/zfs_key diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst index 03893454eee9..db4db73d6d5a 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst @@ -1,7 +1,7 @@ #!/bin/sh set -e -if ! $(ldd "/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then +if ! $(ldd "/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then pam-auth-update --package fi diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in index a197d030d743..bb33386791e1 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in @@ -1 +1 @@ -lib/@DEB_HOST_MULTIARCH@/libuutil.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libuutil.so.* diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in index eaa8c3925e24..5673e2661c6a 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in @@ -1,3 +1,5 @@ -lib/@DEB_HOST_MULTIARCH@/*.a usr/lib/@DEB_HOST_MULTIARCH@ +usr/lib/@DEB_HOST_MULTIARCH@/*.a +usr/lib/@DEB_HOST_MULTIARCH@/*.so +usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig usr/include -usr/lib/@DEB_HOST_MULTIARCH@ + diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in index 6765aaee59cc..a9054c14cc73 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in @@ -1,2 +1,2 @@ -lib/@DEB_HOST_MULTIARCH@/libzfs.so.* -lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libzfs.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.* diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in index 49216742433f..b61b8ab63265 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in @@ -1 +1 @@ -lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.* diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in index b9e872df9ba8..0e087a2709b3 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in @@ -1 +1 @@ -lib/@DEB_HOST_MULTIARCH@/libzpool.so.* +usr/lib/@DEB_HOST_MULTIARCH@/libzpool.so.* diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install index b3afef50dbd4..496cab2ad5e4 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install @@ -1,4 +1,4 @@ -sbin/ztest +usr/sbin/ztest usr/bin/raidz_test usr/share/man/man1/raidz_test.1 usr/share/man/man1/test-runner.1 diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install index a348ba828ee5..30699a8a98da 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install @@ -1,5 +1,5 @@ etc/zfs/zed.d/* -lib/systemd/system/zfs-zed.service +usr/lib/systemd/system/zfs-zed.service usr/lib/zfs-linux/zed.d/* usr/sbin/zed usr/share/man/man8/zed.8 diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install index 37284a78ad18..6810108f2c5d 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install @@ -1,48 +1,48 @@ etc/default/zfs etc/zfs/zfs-functions etc/zfs/zpool.d/ -lib/systemd/system-generators/ -lib/systemd/system-preset/ -lib/systemd/system/zfs-import-cache.service -lib/systemd/system/zfs-import-scan.service -lib/systemd/system/zfs-import.target -lib/systemd/system/zfs-load-key.service -lib/systemd/system/zfs-mount.service -lib/systemd/system/zfs-mount@.service -lib/systemd/system/zfs-scrub-monthly@.timer -lib/systemd/system/zfs-scrub-weekly@.timer -lib/systemd/system/zfs-scrub@.service -lib/systemd/system/zfs-trim-monthly@.timer -lib/systemd/system/zfs-trim-weekly@.timer -lib/systemd/system/zfs-trim@.service -lib/systemd/system/zfs-share.service -lib/systemd/system/zfs-volume-wait.service -lib/systemd/system/zfs-volumes.target -lib/systemd/system/zfs.target -lib/udev/ -sbin/fsck.zfs -sbin/mount.zfs -sbin/zdb -sbin/zfs -sbin/zfs_ids_to_path -sbin/zgenhostid -sbin/zhack -sbin/zinject -sbin/zpool -sbin/zstream -sbin/zstreamdump +usr/lib/systemd/system-generators/ +usr/lib/systemd/system-preset/ +usr/lib/systemd/system/zfs-import-cache.service +usr/lib/systemd/system/zfs-import-scan.service +usr/lib/systemd/system/zfs-import.target +usr/lib/systemd/system/zfs-load-key.service +usr/lib/systemd/system/zfs-mount.service +usr/lib/systemd/system/zfs-mount@.service +usr/lib/systemd/system/zfs-scrub-monthly@.timer +usr/lib/systemd/system/zfs-scrub-weekly@.timer +usr/lib/systemd/system/zfs-scrub@.service +usr/lib/systemd/system/zfs-trim-monthly@.timer +usr/lib/systemd/system/zfs-trim-weekly@.timer +usr/lib/systemd/system/zfs-trim@.service +usr/lib/systemd/system/zfs-share.service +usr/lib/systemd/system/zfs-volume-wait.service +usr/lib/systemd/system/zfs-volumes.target +usr/lib/systemd/system/zfs.target +usr/lib/udev/ +usr/sbin/fsck.zfs +usr/sbin/mount.zfs +usr/sbin/zdb +usr/sbin/zfs +usr/sbin/zfs_ids_to_path +usr/sbin/zgenhostid +usr/sbin/zhack +usr/sbin/zinject +usr/sbin/zpool +usr/sbin/zstream +usr/sbin/zstreamdump usr/bin/zvol_wait -usr/lib/modules-load.d/ lib/ +usr/lib/modules-load.d/ usr/lib/zfs-linux/zpool.d/ usr/lib/zfs-linux/zpool_influxdb usr/lib/zfs-linux/zfs_prepare_disk -usr/sbin/arc_summary -usr/sbin/arcstat -usr/sbin/dbufstat -usr/sbin/zilstat +usr/bin/zarcsummary +usr/bin/zarcstat +usr/bin/dbufstat usr/sbin +usr/bin/zilstat usr/share/zfs/compatibility.d/ usr/share/bash-completion/completions -usr/share/man/man1/arcstat.1 +usr/share/man/man1/zarcstat.1 usr/share/man/man1/zhack.1 usr/share/man/man1/zvol_wait.1 usr/share/man/man5/ diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links new file mode 100644 index 000000000000..54099e6573b0 --- /dev/null +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links @@ -0,0 +1,3 @@ +usr/sbin/zfs usr/bin/zfs +usr/sbin/zpool usr/bin/zpool +usr/lib/zfs-linux/zpool_influxdb usr/bin/zpool_influxdb diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in index 3226d604546c..5087a7e18e16 100755 --- a/sys/contrib/openzfs/contrib/debian/rules.in +++ b/sys/contrib/openzfs/contrib/debian/rules.in @@ -37,18 +37,19 @@ override_dh_auto_configure: @# Build the userland, but don't build the kernel modules. dh_auto_configure -- @CFGOPTS@ \ --bindir=/usr/bin \ - --sbindir=/sbin \ - --libdir=/lib/"$(DEB_HOST_MULTIARCH)" \ - --with-udevdir=/lib/udev \ + --sbindir=/usr/sbin \ + --with-mounthelperdir=/usr/sbin \ + --libdir=/usr/lib/"$(DEB_HOST_MULTIARCH)" \ + --with-udevdir=/usr/lib/udev \ --with-zfsexecdir=/usr/lib/zfs-linux \ --enable-systemd \ --enable-pyzfs \ --with-python=python3 \ - --with-pammoduledir='/lib/$(DEB_HOST_MULTIARCH)/security' \ + --with-pammoduledir='/usr/lib/$(DEB_HOST_MULTIARCH)/security' \ --with-pkgconfigdir='/usr/lib/$(DEB_HOST_MULTIARCH)/pkgconfig' \ - --with-systemdunitdir=/lib/systemd/system \ - --with-systemdpresetdir=/lib/systemd/system-preset \ - --with-systemdgeneratordir=/lib/systemd/system-generators \ + --with-systemdunitdir=/usr/lib/systemd/system \ + --with-systemdpresetdir=/usr/lib/systemd/system-preset \ + --with-systemdgeneratordir=/usr/lib/systemd/system-generators \ --with-config=user for i in $(wildcard $(CURDIR)/debian/*.install.in) ; do \ @@ -77,23 +78,10 @@ override_dh_auto_install: @# Install the utilities. $(MAKE) install DESTDIR='$(CURDIR)/debian/tmp' - # Move from bin_dir to /usr/sbin - # Remove suffix (.py) as per policy 10.4 - Scripts - # https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts - mkdir -p '$(CURDIR)/debian/tmp/usr/sbin/' - mv '$(CURDIR)/debian/tmp/usr/bin/arc_summary' '$(CURDIR)/debian/tmp/usr/sbin/arc_summary' - mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat' - mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat' - mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat' - - @# Zed has dependencies outside of the system root. - mv '$(CURDIR)/debian/tmp/sbin/zed' '$(CURDIR)/debian/tmp/usr/sbin/zed' - sed -i 's|ExecStart=/sbin/|ExecStart=/usr/sbin/|g' '$(CURDIR)/debian/tmp/lib/systemd/system/zfs-zed.service' - @# Install the DKMS source. @# We only want the files needed to build the modules install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \ - '$(CURDIR)/scripts/dkms.postbuild' + '$(CURDIR)/scripts/dkms.postbuild' '$(CURDIR)/scripts/objtool-wrapper.in' $(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;) @# Only ever build Linux modules @@ -108,8 +96,8 @@ override_dh_auto_install: @# - zfs.release$ @# * Takes care of spaces and tabs @# * Remove reference to ZFS_AC_PACKAGE - awk '/^AC_CONFIG_FILES\(\[/,/^\]\)/ {\ - if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))/) \ + awk '/^AC_CONFIG_FILES\(\[/,/\]\)/ {\ + if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))|scripts\/objtool-wrapper.*\]\)$$/) \ {next} } {print}' \ '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' | sed '/ZFS_AC_PACKAGE/d' > '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' @# Set "SUBDIRS = module include" for CONFIG_KERNEL and remove SUBDIRS for all other configs. @@ -131,11 +119,6 @@ override_dh_auto_install: cd '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)'; ./autogen.sh rm -fr '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/autom4te.cache' - for i in `ls $(CURDIR)/debian/tmp/lib/$(DEB_HOST_MULTIARCH)/*.so`; do \ - ln -s '/lib/$(DEB_HOST_MULTIARCH)/'`readlink $${i}` '$(CURDIR)/debian/tmp/usr/lib/$(DEB_HOST_MULTIARCH)/'`basename $${i}`; \ - rm $${i}; \ - done - chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions' chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs' @@ -159,7 +142,7 @@ override_dh_auto_clean: @if test -e META.orig; then mv META.orig META; fi override_dh_install: - find debian/tmp/lib -name '*.la' -delete + find debian/tmp/usr/lib -name '*.la' -delete dh_install override_dh_missing: @@ -173,8 +156,8 @@ override_dh_installinit: dh_installinit -R --name zfs-zed override_dh_installsystemd: - mkdir -p debian/openzfs-zfsutils/lib/systemd/system - ln -sr /dev/null debian/openzfs-zfsutils/lib/systemd/system/zfs-import.service + mkdir -p debian/openzfs-zfsutils/usr/lib/systemd/system + ln -sr /dev/null debian/openzfs-zfsutils/usr/lib/systemd/system/zfs-import.service dh_installsystemd --no-stop-on-upgrade -X zfs-zed.service dh_installsystemd --name zfs-zed diff --git a/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev b/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev index 0cf21a4211a8..d4f968aed8f2 100755 --- a/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev +++ b/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev @@ -5,7 +5,7 @@ PREREQ="udev" PREREQ_UDEV_RULES="60-zvol.rules 69-vdev.rules" -COPY_EXEC_LIST="/lib/udev/zvol_id /lib/udev/vdev_id" +COPY_EXEC_LIST="/usr/lib/udev/zvol_id /usr/lib/udev/vdev_id" # Generic result code. RC=0 diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in index acad468edfd1..130d94c70707 100755 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in @@ -16,7 +16,8 @@ depends() { } installkernel() { - instmods -c zfs + hostonly='' instmods -c zfs + instmods mpt3sas virtio_blk } install() { diff --git a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in index 4776087d9a76..db9bf0e20274 100644 --- a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in +++ b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in @@ -8,3 +8,12 @@ fi . /usr/share/initramfs-tools/hook-functions copy_exec /usr/share/initramfs-tools/zfsunlock /usr/bin/zfsunlock + +if [ -f /etc/initramfs-tools/etc/motd ]; then + copy_file text /etc/initramfs-tools/etc/motd /etc/motd +else + tmpf=$(mktemp) + echo "If you use zfs encrypted root filesystems, you can use \`zfsunlock\` to manually unlock it" > "$tmpf" + copy_file text "$tmpf" /etc/motd + rm -f "$tmpf" +fi diff --git a/sys/contrib/openzfs/contrib/intel_qat/readme.md b/sys/contrib/openzfs/contrib/intel_qat/readme.md index 7e45d395bb80..04c299b6404c 100644 --- a/sys/contrib/openzfs/contrib/intel_qat/readme.md +++ b/sys/contrib/openzfs/contrib/intel_qat/readme.md @@ -8,7 +8,7 @@ This contrib contains community compatibility patches to get Intel QAT working o These patches are based on the following Intel QAT version: [1.7.l.4.10.0-00014](https://01.org/sites/default/files/downloads/qat1.7.l.4.10.0-00014.tar.gz) -When using QAT with above kernels versions, the following patches needs to be applied using: +When using QAT with the above kernel versions, the following patches need to be applied using: patch -p1 < _$PATCH_ _Where $PATCH refers to the path of the patch in question_ diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c index a0bc172c6f44..88698dedabbc 100644 --- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c +++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c @@ -391,7 +391,11 @@ static int zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config, int argc, const char **argv) { +#if defined(__FreeBSD__) + config->homes_prefix = strdup("zroot/home"); +#else config->homes_prefix = strdup("rpool/home"); +#endif if (config->homes_prefix == NULL) { pam_syslog(pamh, LOG_ERR, "strdup failure"); return (PAM_SERVICE_ERR); diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py index b26a37f5de10..26d66a452726 100644 --- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py +++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py @@ -604,5 +604,4 @@ class RaidzExpansionRunning(ZFSError): errno = ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS message = "A raidz device is currently expanding" - # vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4 diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py index 971aa1d0d493..bad1af2d1671 100644 --- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py +++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py @@ -4223,7 +4223,7 @@ class _TempPool(object): self.getRoot().reset() return - # On the Buildbot builders this may fail with "pool is busy" + # On the CI builders this may fail with "pool is busy" # Retry 5 times before raising an error retry = 0 while True: diff --git a/sys/contrib/openzfs/etc/init.d/README.md b/sys/contrib/openzfs/etc/init.d/README.md index da780fdc1222..3852dd9a6b2e 100644 --- a/sys/contrib/openzfs/etc/init.d/README.md +++ b/sys/contrib/openzfs/etc/init.d/README.md @@ -1,5 +1,5 @@ DESCRIPTION - These script were written with the primary intention of being portable and + These scripts were written with the primary intention of being portable and usable on as many systems as possible. This is, in practice, usually not possible. But the intention is there. diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 3fcdc176a621..14930fb90622 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -479,6 +479,8 @@ _LIBZFS_H zpool_status_t zpool_import_status(nvlist_t *, const char **, _LIBZFS_H nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); _LIBZFS_H nvlist_t *zpool_get_features(zpool_handle_t *); _LIBZFS_H int zpool_refresh_stats(zpool_handle_t *, boolean_t *); +_LIBZFS_H void zpool_refresh_stats_from_handle(zpool_handle_t *, + zpool_handle_t *); _LIBZFS_H int zpool_get_errlog(zpool_handle_t *, nvlist_t **); _LIBZFS_H void zpool_add_propname(zpool_handle_t *, const char *); diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h index 2f5fe4619ef7..14b42f2e7087 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h @@ -63,6 +63,17 @@ typedef longlong_t hrtime_t; #define NSEC_TO_TICK(nsec) ((nsec) / (NANOSEC / hz)) static __inline hrtime_t +getlrtime(void) +{ + struct timespec ts; + hrtime_t nsec; + + getnanouptime(&ts); + nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec; + return (nsec); +} + +static __inline hrtime_t gethrtime(void) { struct timespec ts; diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h index 076dab8ba6dc..214f3ea0e787 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h @@ -542,24 +542,6 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id) } #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */ -/* - * All the io_*() helper functions below can operate on a bio, or a rq, but - * not both. The older submit_bio() codepath will pass a bio, and the - * newer blk-mq codepath will pass a rq. - */ -static inline int -io_data_dir(struct bio *bio, struct request *rq) -{ - if (rq != NULL) { - if (op_is_write(req_op(rq))) { - return (WRITE); - } else { - return (READ); - } - } - return (bio_data_dir(bio)); -} - static inline int io_is_flush(struct bio *bio, struct request *rq) { diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h index 16e8a319a5f8..152e5a606f0e 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h @@ -61,32 +61,6 @@ #endif /* - * 2.6.30 API change, - * The const keyword was added to the 'struct dentry_operations' in - * the dentry structure. To handle this we define an appropriate - * dentry_operations_t typedef which can be used. - */ -typedef const struct dentry_operations dentry_operations_t; - -/* - * 2.6.38 API addition, - * Added d_clear_d_op() helper function which clears some flags and the - * registered dentry->d_op table. This is required because d_set_d_op() - * issues a warning when the dentry operations table is already set. - * For the .zfs control directory to work properly we must be able to - * override the default operations table and register custom .d_automount - * and .d_revalidate callbacks. - */ -static inline void -d_clear_d_op(struct dentry *dentry) -{ - dentry->d_op = NULL; - dentry->d_flags &= ~( - DCACHE_OP_HASH | DCACHE_OP_COMPARE | - DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE); -} - -/* * Walk and invalidate all dentry aliases of an inode * unless it's a mountpoint */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h index 563e0a19663d..c883836c2f83 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h @@ -130,7 +130,7 @@ RW_READ_HELD(krwlock_t *rwp) /* * The Linux rwsem implementation does not require a matching destroy. */ -#define rw_destroy(rwp) ((void) 0) +#define rw_destroy(rwp) ASSERT(!(RW_LOCK_HELD(rwp))) /* * Upgrading a rwsem from a reader to a writer is not supported by the diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h index 087389b57b34..ad2815e46394 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h @@ -25,6 +25,6 @@ #ifndef _SPL_STAT_H #define _SPL_STAT_H -#include <linux/stat.h> +#include <sys/stat.h> #endif /* SPL_STAT_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h index 33b273b53996..4edc42a8aef9 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h @@ -80,6 +80,14 @@ gethrestime_sec(void) } static inline hrtime_t +getlrtime(void) +{ + inode_timespec_t ts; + ktime_get_coarse_ts64(&ts); + return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec); +} + +static inline hrtime_t gethrtime(void) { struct timespec64 ts; diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h index f5a9105cd885..8994aab889fe 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h @@ -55,6 +55,7 @@ extern const struct file_operations zpl_dir_file_operations; extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg); extern const struct super_operations zpl_super_operations; +extern const struct dentry_operations zpl_dentry_operations; extern const struct export_operations zpl_export_operations; extern struct file_system_type zpl_fs_type; diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h index b18961be1282..aa5035862def 100644 --- a/sys/contrib/openzfs/include/sys/dmu.h +++ b/sys/contrib/openzfs/include/sys/dmu.h @@ -414,9 +414,9 @@ typedef struct dmu_buf { #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" #define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones" -#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes" -#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days" -#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months" +#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klarasystems:txg_log_time:minutes" +#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klarasystems:txg_log_time:days" +#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klarasystems:txg_log_time:months" /* * Allocate an object from this objset. The range of object numbers diff --git a/sys/contrib/openzfs/include/sys/dmu_impl.h b/sys/contrib/openzfs/include/sys/dmu_impl.h index 21a8b16a3ee6..bae872bd1907 100644 --- a/sys/contrib/openzfs/include/sys/dmu_impl.h +++ b/sys/contrib/openzfs/include/sys/dmu_impl.h @@ -168,12 +168,10 @@ extern "C" { * dn_allocated_txg * dn_free_txg * dn_assigned_txg - * dn_dirty_txg + * dn_dirtycnt * dd_assigned_tx * dn_notxholds * dn_nodnholds - * dn_dirtyctx - * dn_dirtyctx_firstset * (dn_phys copy fields?) * (dn_phys contents?) * held from: diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index 76218c8b09ca..8bd1db5b7165 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -141,12 +141,6 @@ struct dmu_buf_impl; struct objset; struct zio; -enum dnode_dirtycontext { - DN_UNDIRTIED, - DN_DIRTY_OPEN, - DN_DIRTY_SYNC -}; - /* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ #define DNODE_FLAG_USED_BYTES (1 << 0) #define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1) @@ -340,11 +334,9 @@ struct dnode { uint64_t dn_allocated_txg; uint64_t dn_free_txg; uint64_t dn_assigned_txg; - uint64_t dn_dirty_txg; /* txg dnode was last dirtied */ + uint8_t dn_dirtycnt; kcondvar_t dn_notxholds; kcondvar_t dn_nodnholds; - enum dnode_dirtycontext dn_dirtyctx; - const void *dn_dirtyctx_firstset; /* dbg: contents meaningless */ /* protected by own devices */ zfs_refcount_t dn_tx_holds; @@ -440,7 +432,6 @@ void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); -void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); @@ -468,9 +459,6 @@ void dnode_free_interior_slots(dnode_t *dn); void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type); -#define DNODE_IS_DIRTY(_dn) \ - ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) - #define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ (((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \ diff --git a/sys/contrib/openzfs/include/sys/dsl_deleg.h b/sys/contrib/openzfs/include/sys/dsl_deleg.h index ae729b9f32ff..36dd6211219d 100644 --- a/sys/contrib/openzfs/include/sys/dsl_deleg.h +++ b/sys/contrib/openzfs/include/sys/dsl_deleg.h @@ -46,6 +46,7 @@ extern "C" { #define ZFS_DELEG_PERM_MOUNT "mount" #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_SEND_RAW "send:raw" #define ZFS_DELEG_PERM_RECEIVE "receive" #define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append" #define ZFS_DELEG_PERM_ALLOW "allow" diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h index 659c64bf15a6..a771b11420fd 100644 --- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h @@ -58,6 +58,7 @@ extern "C" { #define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" #define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write" +#define FM_EREPORT_ZFS_SITOUT "sitout" #define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" #define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h index fc359c10365a..662fd81c5ee1 100644 --- a/sys/contrib/openzfs/include/sys/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fs/zfs.h @@ -385,6 +385,8 @@ typedef enum { VDEV_PROP_TRIM_SUPPORT, VDEV_PROP_TRIM_ERRORS, VDEV_PROP_SLOW_IOS, + VDEV_PROP_SIT_OUT, + VDEV_PROP_AUTOSIT, VDEV_NUM_PROPS } vdev_prop_t; @@ -746,6 +748,8 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" #define ZPOOL_CONFIG_ASHIFT "ashift" #define ZPOOL_CONFIG_ASIZE "asize" +#define ZPOOL_CONFIG_MIN_ALLOC "min_alloc" +#define ZPOOL_CONFIG_MAX_ALLOC "max_alloc" #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */ #define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */ @@ -1673,6 +1677,7 @@ typedef enum { ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS, ZFS_ERR_ASHIFT_MISMATCH, ZFS_ERR_STREAM_LARGE_MICROZAP, + ZFS_ERR_TOO_MANY_SITOUTS, } zfs_errno_t; /* diff --git a/sys/contrib/openzfs/include/sys/range_tree.h b/sys/contrib/openzfs/include/sys/range_tree.h index 0f6884682459..0f6def36f9f6 100644 --- a/sys/contrib/openzfs/include/sys/range_tree.h +++ b/sys/contrib/openzfs/include/sys/range_tree.h @@ -238,8 +238,7 @@ zfs_rs_set_end_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t end) } static inline void -zfs_zfs_rs_set_fill_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, - uint64_t fill) +zfs_rs_set_fill_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t fill) { ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { @@ -277,7 +276,7 @@ static inline void zfs_rs_set_fill(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t fill) { ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift)); - zfs_zfs_rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); + zfs_rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); } typedef void zfs_range_tree_func_t(void *arg, uint64_t start, uint64_t size); diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index 66db16b33c51..f172f2af6f07 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -1030,7 +1030,7 @@ extern void spa_import_progress_set_notes_nolog(spa_t *spa, extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag, krw_t rw); extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw); -extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, +extern void spa_config_enter_priority(spa_t *spa, int locks, const void *tag, krw_t rw); extern void spa_config_exit(spa_t *spa, int locks, const void *tag); extern int spa_config_held(spa_t *spa, int locks, krw_t rw); @@ -1084,6 +1084,7 @@ extern pool_state_t spa_state(spa_t *spa); extern spa_load_state_t spa_load_state(spa_t *spa); extern uint64_t spa_freeze_txg(spa_t *spa); extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize); +extern void spa_get_min_alloc_range(spa_t *spa, uint64_t *min, uint64_t *max); extern uint64_t spa_get_dspace(spa_t *spa); extern uint64_t spa_get_checkpoint_space(spa_t *spa); extern uint64_t spa_get_slop_space(spa_t *spa); diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index 07a959db3447..62b062984d36 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -265,6 +265,7 @@ struct spa { uint64_t spa_min_ashift; /* of vdevs in normal class */ uint64_t spa_max_ashift; /* of vdevs in normal class */ uint64_t spa_min_alloc; /* of vdevs in normal class */ + uint64_t spa_max_alloc; /* of vdevs in normal class */ uint64_t spa_gcd_alloc; /* of vdevs in normal class */ uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_load_guid; /* spa_load initialized guid */ diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h index 4ab472bd6742..5a8c2f846be2 100644 --- a/sys/contrib/openzfs/include/sys/vdev_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_impl.h @@ -279,10 +279,12 @@ struct vdev { uint64_t vdev_noalloc; /* device is passivated? */ uint64_t vdev_removing; /* device is being removed? */ uint64_t vdev_failfast; /* device failfast setting */ + boolean_t vdev_autosit; /* automatic sitout management */ boolean_t vdev_rz_expanding; /* raidz is being expanded? */ boolean_t vdev_ishole; /* is a hole in the namespace */ uint64_t vdev_top_zap; vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */ + uint64_t vdev_last_latency_check; /* pool checkpoint related */ space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */ @@ -431,6 +433,10 @@ struct vdev { hrtime_t vdev_mmp_pending; /* 0 if write finished */ uint64_t vdev_mmp_kstat_id; /* to find kstat entry */ uint64_t vdev_expansion_time; /* vdev's last expansion time */ + /* used to calculate average read latency */ + uint64_t *vdev_prev_histo; + int64_t vdev_outlier_count; /* read outlier amongst peers */ + hrtime_t vdev_read_sit_out_expire; /* end of sit out period */ list_node_t vdev_leaf_node; /* leaf vdev list */ /* diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h index 3b02728cdbf3..df8c2aed4045 100644 --- a/sys/contrib/openzfs/include/sys/vdev_raidz.h +++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h @@ -61,6 +61,9 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); struct raidz_row *vdev_raidz_row_alloc(int, zio_t *); void vdev_raidz_reflow_copy_scratch(spa_t *); void raidz_dtl_reassessed(vdev_t *); +boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t); +void vdev_raidz_sit_child(vdev_t *, uint64_t); +void vdev_raidz_unsit_child(vdev_t *); extern const zio_vsd_ops_t vdev_raidz_vsd_ops; diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h index debce6f09a22..8c8dcfb077f6 100644 --- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h @@ -119,6 +119,7 @@ typedef struct raidz_col { uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */ uint8_t rc_force_repair:1; /* Write good data to this column */ uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */ + uint8_t rc_latency_outlier:1; /* Latency outlier for this device */ int rc_shadow_devidx; /* for double write during expansion */ int rc_shadow_error; /* for double write during expansion */ uint64_t rc_shadow_offset; /* for double write during expansion */ @@ -133,6 +134,7 @@ typedef struct raidz_row { int rr_firstdatacol; /* First data column/parity count */ abd_t *rr_abd_empty; /* dRAID empty sector buffer */ int rr_nempty; /* empty sectors included in parity */ + int rr_outlier_cnt; /* Count of latency outlier devices */ #ifdef ZFS_DEBUG uint64_t rr_offset; /* Logical offset for *_io_verify() */ uint64_t rr_size; /* Physical size for *_io_verify() */ diff --git a/sys/contrib/openzfs/include/sys/zfs_file.h b/sys/contrib/openzfs/include/sys/zfs_file.h index a1f344c2bb79..67abe9988aaa 100644 --- a/sys/contrib/openzfs/include/sys/zfs_file.h +++ b/sys/contrib/openzfs/include/sys/zfs_file.h @@ -46,7 +46,7 @@ void zfs_file_close(zfs_file_t *fp); int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid); int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off, - ssize_t *resid); + uint8_t ashift, ssize_t *resid); int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid); int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off, ssize_t *resid); diff --git a/sys/contrib/openzfs/include/sys/zfs_ioctl.h b/sys/contrib/openzfs/include/sys/zfs_ioctl.h index 8174242abdac..cfe11f43bb8e 100644 --- a/sys/contrib/openzfs/include/sys/zfs_ioctl.h +++ b/sys/contrib/openzfs/include/sys/zfs_ioctl.h @@ -455,6 +455,7 @@ typedef enum zinject_type { ZINJECT_DECRYPT_FAULT, ZINJECT_DELAY_IMPORT, ZINJECT_DELAY_EXPORT, + ZINJECT_DELAY_READY, } zinject_type_t; typedef enum zinject_iotype { diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 353805fcb969..acb0a03a36b2 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -82,7 +82,8 @@ gbh_nblkptrs(uint64_t size) { static inline zio_eck_t * gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) { ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t))); - return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - sizeof (zio_eck_t))); + return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - + sizeof (zio_eck_t))); } static inline blkptr_t * @@ -717,6 +718,7 @@ extern void zio_handle_ignored_writes(zio_t *zio); extern hrtime_t zio_handle_io_delay(zio_t *zio); extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed); extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed); +extern hrtime_t zio_handle_ready_delay(zio_t *zio); /* * Checksum ereport functions diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h index cdc9dba2a28d..5791246e99e4 100644 --- a/sys/contrib/openzfs/include/sys/zvol.h +++ b/sys/contrib/openzfs/include/sys/zvol.h @@ -53,7 +53,7 @@ extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern int zvol_set_ro(const char *, boolean_t); -extern zvol_state_handle_t *zvol_suspend(const char *); +extern int zvol_suspend(const char *, zvol_state_handle_t **); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/sys/contrib/openzfs/include/zfs_deleg.h b/sys/contrib/openzfs/include/zfs_deleg.h index f80fe46d35f8..a7bbf1620ad5 100644 --- a/sys/contrib/openzfs/include/zfs_deleg.h +++ b/sys/contrib/openzfs/include/zfs_deleg.h @@ -55,6 +55,7 @@ typedef enum { ZFS_DELEG_NOTE_PROMOTE, ZFS_DELEG_NOTE_RENAME, ZFS_DELEG_NOTE_SEND, + ZFS_DELEG_NOTE_SEND_RAW, ZFS_DELEG_NOTE_RECEIVE, ZFS_DELEG_NOTE_ALLOW, ZFS_DELEG_NOTE_USERPROP, diff --git a/sys/contrib/openzfs/lib/libspl/Makefile.am b/sys/contrib/openzfs/lib/libspl/Makefile.am index 6640ecd582a7..0fd907d3011e 100644 --- a/sys/contrib/openzfs/lib/libspl/Makefile.am +++ b/sys/contrib/openzfs/lib/libspl/Makefile.am @@ -2,6 +2,9 @@ include $(srcdir)/%D%/include/Makefile.am libspl_assert_la_CFLAGS = $(AM_CFLAGS) $(LIBRARY_CFLAGS) $(LIBUNWIND_CFLAGS) libspl_la_CFLAGS = $(libspl_assert_la_CFLAGS) +if TARGET_CPU_I386 +libspl_la_CFLAGS += $(NO_ATOMIC_ALIGNMENT) +endif noinst_LTLIBRARIES += libspl_assert.la libspl.la CPPCHECKTARGETS += libspl_assert.la libspl.la diff --git a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h index a605af962a6d..13cc0b46ac93 100644 --- a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h @@ -33,7 +33,7 @@ #ifdef HAVE_STATX #include <fcntl.h> -#include <linux/stat.h> +#include <sys/stat.h> #endif /* diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/time.h b/sys/contrib/openzfs/lib/libspl/include/sys/time.h index da80a5852ae5..062c6ec979fc 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/time.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/time.h @@ -98,6 +98,15 @@ gethrestime_sec(void) } static inline hrtime_t +getlrtime(void) +{ + struct timeval tv; + (void) gettimeofday(&tv, NULL); + return ((((uint64_t)tv.tv_sec) * NANOSEC) + + ((uint64_t)tv.tv_usec * NSEC_PER_USEC)); +} + +static inline hrtime_t gethrtime(void) { struct timespec ts; diff --git a/sys/contrib/openzfs/lib/libuutil/libuutil.abi b/sys/contrib/openzfs/lib/libuutil/libuutil.abi index 6c736c61e4a5..2a740afa07ca 100644 --- a/sys/contrib/openzfs/lib/libuutil/libuutil.abi +++ b/sys/contrib/openzfs/lib/libuutil/libuutil.abi @@ -616,6 +616,7 @@ <array-type-def dimensions='1' type-id='de572c22' size-in-bits='1472' id='6d3c2f42'> <subrange length='23' type-id='7359adad' id='fdd0f594'/> </array-type-def> + <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> <array-type-def dimensions='1' type-id='3a47d82b' size-in-bits='256' id='a133ec23'> <subrange length='4' type-id='7359adad' id='16fe7105'/> </array-type-def> @@ -1020,13 +1021,6 @@ <array-type-def dimensions='1' type-id='03085adc' size-in-bits='192' id='083f8d58'> <subrange length='3' type-id='7359adad' id='56f209d2'/> </array-type-def> - <array-type-def dimensions='1' type-id='d315442e' size-in-bits='16' id='811205dc'> - <subrange length='1' type-id='7359adad' id='52f813b4'/> - </array-type-def> - <array-type-def dimensions='1' type-id='d3130597' size-in-bits='768' id='f63f23b9'> - <subrange length='12' type-id='7359adad' id='84827bdc'/> - </array-type-def> - <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> <class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/> @@ -1061,93 +1055,6 @@ <var-decl name='mnt_minor' type-id='3502e3ff' visibility='default'/> </data-member> </class-decl> - <typedef-decl name='__u16' type-id='8efea9e5' id='d315442e'/> - <typedef-decl name='__s32' type-id='95e97e5e' id='3158a266'/> - <typedef-decl name='__u32' type-id='f0981eeb' id='3f1a6b60'/> - <typedef-decl name='__s64' type-id='1eb56b1e' id='49659421'/> - <typedef-decl name='__u64' type-id='3a47d82b' id='d3130597'/> - <class-decl name='statx_timestamp' size-in-bits='128' is-struct='yes' visibility='default' id='94101016'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='tv_sec' type-id='49659421' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='tv_nsec' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='96'> - <var-decl name='__reserved' type-id='3158a266' visibility='default'/> - </data-member> - </class-decl> - <class-decl name='statx' size-in-bits='2048' is-struct='yes' visibility='default' id='720b04c5'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='stx_mask' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='32'> - <var-decl name='stx_blksize' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='stx_attributes' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='128'> - <var-decl name='stx_nlink' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='160'> - <var-decl name='stx_uid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='192'> - <var-decl name='stx_gid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='224'> - <var-decl name='stx_mode' type-id='d315442e' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='240'> - <var-decl name='__spare0' type-id='811205dc' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='256'> - <var-decl name='stx_ino' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='320'> - <var-decl name='stx_size' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='384'> - <var-decl name='stx_blocks' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='448'> - <var-decl name='stx_attributes_mask' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='512'> - <var-decl name='stx_atime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='640'> - <var-decl name='stx_btime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='768'> - <var-decl name='stx_ctime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='896'> - <var-decl name='stx_mtime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1024'> - <var-decl name='stx_rdev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1056'> - <var-decl name='stx_rdev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1088'> - <var-decl name='stx_dev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1120'> - <var-decl name='stx_dev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1152'> - <var-decl name='stx_mnt_id' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1216'> - <var-decl name='__spare2' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1280'> - <var-decl name='__spare3' type-id='f63f23b9' visibility='default'/> - </data-member> - </class-decl> <class-decl name='mntent' size-in-bits='320' is-struct='yes' visibility='default' id='56fe4a37'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='mnt_fsname' type-id='26a90f95' visibility='default'/> @@ -1237,8 +1144,6 @@ <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/> <pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/> <qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/> - <pointer-type-def type-id='720b04c5' size-in-bits='64' id='936b8e35'/> - <qualified-type-def type-id='936b8e35' restrict='yes' id='31d265b7'/> <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='e75a27e9'/> <parameter type-id='3cad23cd'/> @@ -1254,14 +1159,6 @@ <parameter type-id='95e97e5e'/> <return type-id='26a90f95'/> </function-decl> - <function-decl name='statx' visibility='default' binding='global' size-in-bits='64'> - <parameter type-id='95e97e5e'/> - <parameter type-id='9d26089a'/> - <parameter type-id='95e97e5e'/> - <parameter type-id='f0981eeb'/> - <parameter type-id='31d265b7'/> - <return type-id='95e97e5e'/> - </function-decl> <function-decl name='__fprintf_chk' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='e75a27e9'/> <parameter type-id='95e97e5e'/> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index ba161d1ef10f..f988d27a286a 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -571,6 +571,7 @@ <elf-symbol name='zpool_props_refresh' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_read_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_refresh_stats' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zpool_refresh_stats_from_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_reguid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_reopen_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_scan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -641,7 +642,7 @@ <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='spa_feature_table' size='2632' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> - <elf-symbol name='zfs_deleg_perm_tab' size='528' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zfs_deleg_perm_tab' size='544' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_max_dataset_nesting' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_userquota_prop_prefixes' size='96' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -1458,103 +1459,8 @@ </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libspl/os/linux/getmntany.c' language='LANG_C99'> - <array-type-def dimensions='1' type-id='d315442e' size-in-bits='16' id='811205dc'> - <subrange length='1' type-id='7359adad' id='52f813b4'/> - </array-type-def> - <array-type-def dimensions='1' type-id='d3130597' size-in-bits='768' id='f63f23b9'> - <subrange length='12' type-id='7359adad' id='84827bdc'/> - </array-type-def> - <typedef-decl name='__u16' type-id='8efea9e5' id='d315442e'/> - <typedef-decl name='__s32' type-id='95e97e5e' id='3158a266'/> - <typedef-decl name='__u32' type-id='f0981eeb' id='3f1a6b60'/> - <typedef-decl name='__s64' type-id='1eb56b1e' id='49659421'/> - <typedef-decl name='__u64' type-id='3a47d82b' id='d3130597'/> - <class-decl name='statx_timestamp' size-in-bits='128' is-struct='yes' visibility='default' id='94101016'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='tv_sec' type-id='49659421' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='tv_nsec' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='96'> - <var-decl name='__reserved' type-id='3158a266' visibility='default'/> - </data-member> - </class-decl> - <class-decl name='statx' size-in-bits='2048' is-struct='yes' visibility='default' id='720b04c5'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='stx_mask' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='32'> - <var-decl name='stx_blksize' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='stx_attributes' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='128'> - <var-decl name='stx_nlink' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='160'> - <var-decl name='stx_uid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='192'> - <var-decl name='stx_gid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='224'> - <var-decl name='stx_mode' type-id='d315442e' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='240'> - <var-decl name='__spare0' type-id='811205dc' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='256'> - <var-decl name='stx_ino' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='320'> - <var-decl name='stx_size' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='384'> - <var-decl name='stx_blocks' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='448'> - <var-decl name='stx_attributes_mask' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='512'> - <var-decl name='stx_atime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='640'> - <var-decl name='stx_btime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='768'> - <var-decl name='stx_ctime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='896'> - <var-decl name='stx_mtime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1024'> - <var-decl name='stx_rdev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1056'> - <var-decl name='stx_rdev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1088'> - <var-decl name='stx_dev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1120'> - <var-decl name='stx_dev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1152'> - <var-decl name='stx_mnt_id' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1216'> - <var-decl name='__spare2' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1280'> - <var-decl name='__spare3' type-id='f63f23b9' visibility='default'/> - </data-member> - </class-decl> <pointer-type-def type-id='56fe4a37' size-in-bits='64' id='b6b61d2f'/> <qualified-type-def type-id='b6b61d2f' restrict='yes' id='3cad23cd'/> - <pointer-type-def type-id='720b04c5' size-in-bits='64' id='936b8e35'/> - <qualified-type-def type-id='936b8e35' restrict='yes' id='31d265b7'/> <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='e75a27e9'/> <parameter type-id='3cad23cd'/> @@ -1566,14 +1472,6 @@ <parameter type-id='822cd80b'/> <return type-id='95e97e5e'/> </function-decl> - <function-decl name='statx' visibility='default' binding='global' size-in-bits='64'> - <parameter type-id='95e97e5e'/> - <parameter type-id='9d26089a'/> - <parameter type-id='95e97e5e'/> - <parameter type-id='f0981eeb'/> - <parameter type-id='31d265b7'/> - <return type-id='95e97e5e'/> - </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libspl/timestamp.c' language='LANG_C99'> <typedef-decl name='nl_item' type-id='95e97e5e' id='03b79a94'/> @@ -3194,6 +3092,10 @@ <parameter type-id='dace003f'/> <return type-id='80f4b756'/> </function-decl> + <function-decl name='fnvlist_dup' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='22cce67b'/> + <return type-id='5ce45b60'/> + </function-decl> <function-decl name='fnvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='3fa542f0'/> <return type-id='5ce45b60'/> @@ -3238,6 +3140,11 @@ <parameter type-id='37e3bd22' name='missing'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zpool_refresh_stats_from_handle' mangled-name='zpool_refresh_stats_from_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_refresh_stats_from_handle'> + <parameter type-id='4c81de99' name='dzhp'/> + <parameter type-id='4c81de99' name='szhp'/> + <return type-id='48b5725f'/> + </function-decl> <function-decl name='zpool_skip_pool' mangled-name='zpool_skip_pool' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_skip_pool'> <parameter type-id='80f4b756' name='poolname'/> <return type-id='c19b74c3'/> @@ -6117,7 +6024,9 @@ <enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/> <enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/> <enumerator name='VDEV_PROP_SLOW_IOS' value='51'/> - <enumerator name='VDEV_NUM_PROPS' value='52'/> + <enumerator name='VDEV_PROP_SIT_OUT' value='52'/> + <enumerator name='VDEV_PROP_AUTOSIT' value='53'/> + <enumerator name='VDEV_NUM_PROPS' value='54'/> </enum-decl> <typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/> <class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'> @@ -9396,10 +9305,6 @@ <parameter type-id='5ce45b60'/> <return type-id='48b5725f'/> </function-decl> - <function-decl name='fnvlist_dup' visibility='default' binding='global' size-in-bits='64'> - <parameter type-id='22cce67b'/> - <return type-id='5ce45b60'/> - </function-decl> <function-decl name='spl_pagesize' mangled-name='spl_pagesize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='spl_pagesize'> <return type-id='b59d7dce'/> </function-decl> @@ -9772,8 +9677,8 @@ </function-decl> </abi-instr> <abi-instr address-size='64' path='module/zcommon/zfs_deleg.c' language='LANG_C99'> - <array-type-def dimensions='1' type-id='fa1870fd' size-in-bits='4224' id='55e705e7'> - <subrange length='33' type-id='7359adad' id='6a5934df'/> + <array-type-def dimensions='1' type-id='fa1870fd' size-in-bits='4352' id='55f84f08'> + <subrange length='34' type-id='7359adad' id='6a6a7e00'/> </array-type-def> <array-type-def dimensions='1' type-id='fa1870fd' size-in-bits='infinite' id='7c00e69d'> <subrange length='infinite' id='031f2035'/> @@ -9803,30 +9708,31 @@ <enumerator name='ZFS_DELEG_NOTE_PROMOTE' value='5'/> <enumerator name='ZFS_DELEG_NOTE_RENAME' value='6'/> <enumerator name='ZFS_DELEG_NOTE_SEND' value='7'/> - <enumerator name='ZFS_DELEG_NOTE_RECEIVE' value='8'/> - <enumerator name='ZFS_DELEG_NOTE_ALLOW' value='9'/> - <enumerator name='ZFS_DELEG_NOTE_USERPROP' value='10'/> - <enumerator name='ZFS_DELEG_NOTE_MOUNT' value='11'/> - <enumerator name='ZFS_DELEG_NOTE_SHARE' value='12'/> - <enumerator name='ZFS_DELEG_NOTE_USERQUOTA' value='13'/> - <enumerator name='ZFS_DELEG_NOTE_GROUPQUOTA' value='14'/> - <enumerator name='ZFS_DELEG_NOTE_USERUSED' value='15'/> - <enumerator name='ZFS_DELEG_NOTE_GROUPUSED' value='16'/> - <enumerator name='ZFS_DELEG_NOTE_USEROBJQUOTA' value='17'/> - <enumerator name='ZFS_DELEG_NOTE_GROUPOBJQUOTA' value='18'/> - <enumerator name='ZFS_DELEG_NOTE_USEROBJUSED' value='19'/> - <enumerator name='ZFS_DELEG_NOTE_GROUPOBJUSED' value='20'/> - <enumerator name='ZFS_DELEG_NOTE_HOLD' value='21'/> - <enumerator name='ZFS_DELEG_NOTE_RELEASE' value='22'/> - <enumerator name='ZFS_DELEG_NOTE_DIFF' value='23'/> - <enumerator name='ZFS_DELEG_NOTE_BOOKMARK' value='24'/> - <enumerator name='ZFS_DELEG_NOTE_LOAD_KEY' value='25'/> - <enumerator name='ZFS_DELEG_NOTE_CHANGE_KEY' value='26'/> - <enumerator name='ZFS_DELEG_NOTE_PROJECTUSED' value='27'/> - <enumerator name='ZFS_DELEG_NOTE_PROJECTQUOTA' value='28'/> - <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJUSED' value='29'/> - <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJQUOTA' value='30'/> - <enumerator name='ZFS_DELEG_NOTE_NONE' value='31'/> + <enumerator name='ZFS_DELEG_NOTE_SEND_RAW' value='8'/> + <enumerator name='ZFS_DELEG_NOTE_RECEIVE' value='9'/> + <enumerator name='ZFS_DELEG_NOTE_ALLOW' value='10'/> + <enumerator name='ZFS_DELEG_NOTE_USERPROP' value='11'/> + <enumerator name='ZFS_DELEG_NOTE_MOUNT' value='12'/> + <enumerator name='ZFS_DELEG_NOTE_SHARE' value='13'/> + <enumerator name='ZFS_DELEG_NOTE_USERQUOTA' value='14'/> + <enumerator name='ZFS_DELEG_NOTE_GROUPQUOTA' value='15'/> + <enumerator name='ZFS_DELEG_NOTE_USERUSED' value='16'/> + <enumerator name='ZFS_DELEG_NOTE_GROUPUSED' value='17'/> + <enumerator name='ZFS_DELEG_NOTE_USEROBJQUOTA' value='18'/> + <enumerator name='ZFS_DELEG_NOTE_GROUPOBJQUOTA' value='19'/> + <enumerator name='ZFS_DELEG_NOTE_USEROBJUSED' value='20'/> + <enumerator name='ZFS_DELEG_NOTE_GROUPOBJUSED' value='21'/> + <enumerator name='ZFS_DELEG_NOTE_HOLD' value='22'/> + <enumerator name='ZFS_DELEG_NOTE_RELEASE' value='23'/> + <enumerator name='ZFS_DELEG_NOTE_DIFF' value='24'/> + <enumerator name='ZFS_DELEG_NOTE_BOOKMARK' value='25'/> + <enumerator name='ZFS_DELEG_NOTE_LOAD_KEY' value='26'/> + <enumerator name='ZFS_DELEG_NOTE_CHANGE_KEY' value='27'/> + <enumerator name='ZFS_DELEG_NOTE_PROJECTUSED' value='28'/> + <enumerator name='ZFS_DELEG_NOTE_PROJECTQUOTA' value='29'/> + <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJUSED' value='30'/> + <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJQUOTA' value='31'/> + <enumerator name='ZFS_DELEG_NOTE_NONE' value='32'/> </enum-decl> <typedef-decl name='zfs_deleg_note_t' type-id='729d4547' id='4613c173'/> <class-decl name='zfs_deleg_perm_tab' size-in-bits='128' is-struct='yes' visibility='default' id='5aa05c1f'> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_config.c b/sys/contrib/openzfs/lib/libzfs/libzfs_config.c index 0d2102191389..9d704e4303ff 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_config.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_config.c @@ -308,6 +308,23 @@ zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing) } /* + * Copies the pool config and state from szhp to dzhp. szhp and dzhp must + * represent the same pool. Used by pool_list_refresh() to avoid another + * round-trip into the kernel to get stats already collected earlier in the + * function. + */ +void +zpool_refresh_stats_from_handle(zpool_handle_t *dzhp, zpool_handle_t *szhp) +{ + VERIFY0(strcmp(dzhp->zpool_name, szhp->zpool_name)); + nvlist_free(dzhp->zpool_old_config); + dzhp->zpool_old_config = dzhp->zpool_config; + dzhp->zpool_config = fnvlist_dup(szhp->zpool_config); + dzhp->zpool_config_size = szhp->zpool_config_size; + dzhp->zpool_state = szhp->zpool_state; +} + +/* * The following environment variables are undocumented * and should be used for testing purposes only: * diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index 10b42720e963..ce154ae1a4cd 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -5549,6 +5549,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, /* Only use if provided by the RAIDZ VDEV above */ if (prop == VDEV_PROP_RAIDZ_EXPANDING) return (ENOENT); + if (prop == VDEV_PROP_SIT_OUT) + return (ENOENT); } if (vdev_prop_index_to_string(prop, intval, (const char **)&strval) != 0) @@ -5718,8 +5720,16 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname, nvlist_free(nvl); nvlist_free(outnvl); - if (ret) - (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); + if (ret) { + if (errno == ENOTSUP) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "property not supported for this vdev")); + (void) zfs_error(zhp->zpool_hdl, EZFS_PROPTYPE, errbuf); + } else { + (void) zpool_standard_error(zhp->zpool_hdl, errno, + errbuf); + } + } return (ret); } diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c index 4edddc2a759b..26f5135dff62 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c @@ -776,6 +776,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_ASHIFT_MISMATCH: zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap); break; + case ZFS_ERR_TOO_MANY_SITOUTS: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "too many disks " + "already sitting out")); + zfs_verror(hdl, EZFS_BUSY, fmt, ap); + break; default: zfs_error_aux(hdl, "%s", zfs_strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); diff --git a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi index 7464b3adb254..263cad045f7a 100644 --- a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi +++ b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi @@ -617,6 +617,7 @@ <array-type-def dimensions='1' type-id='de572c22' size-in-bits='1472' id='6d3c2f42'> <subrange length='23' type-id='7359adad' id='fdd0f594'/> </array-type-def> + <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> <array-type-def dimensions='1' type-id='3a47d82b' size-in-bits='256' id='a133ec23'> <subrange length='4' type-id='7359adad' id='16fe7105'/> </array-type-def> @@ -988,13 +989,6 @@ </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libspl/os/linux/getmntany.c' language='LANG_C99'> - <array-type-def dimensions='1' type-id='d315442e' size-in-bits='16' id='811205dc'> - <subrange length='1' type-id='7359adad' id='52f813b4'/> - </array-type-def> - <array-type-def dimensions='1' type-id='d3130597' size-in-bits='768' id='f63f23b9'> - <subrange length='12' type-id='7359adad' id='84827bdc'/> - </array-type-def> - <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> <class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/> @@ -1029,93 +1023,6 @@ <var-decl name='mnt_minor' type-id='3502e3ff' visibility='default'/> </data-member> </class-decl> - <typedef-decl name='__u16' type-id='8efea9e5' id='d315442e'/> - <typedef-decl name='__s32' type-id='95e97e5e' id='3158a266'/> - <typedef-decl name='__u32' type-id='f0981eeb' id='3f1a6b60'/> - <typedef-decl name='__s64' type-id='1eb56b1e' id='49659421'/> - <typedef-decl name='__u64' type-id='3a47d82b' id='d3130597'/> - <class-decl name='statx_timestamp' size-in-bits='128' is-struct='yes' visibility='default' id='94101016'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='tv_sec' type-id='49659421' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='tv_nsec' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='96'> - <var-decl name='__reserved' type-id='3158a266' visibility='default'/> - </data-member> - </class-decl> - <class-decl name='statx' size-in-bits='2048' is-struct='yes' visibility='default' id='720b04c5'> - <data-member access='public' layout-offset-in-bits='0'> - <var-decl name='stx_mask' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='32'> - <var-decl name='stx_blksize' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='64'> - <var-decl name='stx_attributes' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='128'> - <var-decl name='stx_nlink' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='160'> - <var-decl name='stx_uid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='192'> - <var-decl name='stx_gid' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='224'> - <var-decl name='stx_mode' type-id='d315442e' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='240'> - <var-decl name='__spare0' type-id='811205dc' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='256'> - <var-decl name='stx_ino' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='320'> - <var-decl name='stx_size' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='384'> - <var-decl name='stx_blocks' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='448'> - <var-decl name='stx_attributes_mask' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='512'> - <var-decl name='stx_atime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='640'> - <var-decl name='stx_btime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='768'> - <var-decl name='stx_ctime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='896'> - <var-decl name='stx_mtime' type-id='94101016' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1024'> - <var-decl name='stx_rdev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1056'> - <var-decl name='stx_rdev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1088'> - <var-decl name='stx_dev_major' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1120'> - <var-decl name='stx_dev_minor' type-id='3f1a6b60' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1152'> - <var-decl name='stx_mnt_id' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1216'> - <var-decl name='__spare2' type-id='d3130597' visibility='default'/> - </data-member> - <data-member access='public' layout-offset-in-bits='1280'> - <var-decl name='__spare3' type-id='f63f23b9' visibility='default'/> - </data-member> - </class-decl> <class-decl name='mntent' size-in-bits='320' is-struct='yes' visibility='default' id='56fe4a37'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='mnt_fsname' type-id='26a90f95' visibility='default'/> @@ -1191,8 +1098,6 @@ <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/> <pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/> <qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/> - <pointer-type-def type-id='720b04c5' size-in-bits='64' id='936b8e35'/> - <qualified-type-def type-id='936b8e35' restrict='yes' id='31d265b7'/> <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='e75a27e9'/> <parameter type-id='3cad23cd'/> @@ -1208,14 +1113,6 @@ <parameter type-id='95e97e5e'/> <return type-id='26a90f95'/> </function-decl> - <function-decl name='statx' visibility='default' binding='global' size-in-bits='64'> - <parameter type-id='95e97e5e'/> - <parameter type-id='9d26089a'/> - <parameter type-id='95e97e5e'/> - <parameter type-id='f0981eeb'/> - <parameter type-id='31d265b7'/> - <return type-id='95e97e5e'/> - </function-decl> <function-decl name='stat64' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='9d26089a'/> <parameter type-id='f1cadedf'/> diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index fea2f81458f9..8ed374627264 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -1238,7 +1238,7 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) */ int zfs_file_pwrite(zfs_file_t *fp, const void *buf, - size_t count, loff_t pos, ssize_t *resid) + size_t count, loff_t pos, uint8_t ashift, ssize_t *resid) { ssize_t rc, split, done; int sectors; @@ -1248,8 +1248,8 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf, * system calls so that the process can be killed in between. * This is used by ztest to simulate realistic failure modes. */ - sectors = count >> SPA_MINBLOCKSHIFT; - split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT; + sectors = count >> ashift; + split = (sectors > 0 ? rand() % sectors : 0) << ashift; rc = pwrite64(fp->f_fd, buf, split, pos); if (rc != -1) { done = rc; diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am index 6a7b2d3e46b7..7a63641c1c39 100644 --- a/sys/contrib/openzfs/man/Makefile.am +++ b/sys/contrib/openzfs/man/Makefile.am @@ -2,7 +2,7 @@ dist_noinst_man_MANS = \ %D%/man1/cstyle.1 dist_man_MANS = \ - %D%/man1/arcstat.1 \ + %D%/man1/zarcstat.1 \ %D%/man1/raidz_test.1 \ %D%/man1/test-runner.1 \ %D%/man1/zhack.1 \ @@ -124,10 +124,21 @@ dist_noinst_DATA += $(dist_noinst_man_MANS) $(dist_man_MANS) SUBSTFILES += $(nodist_man_MANS) -CHECKS += mancheck -mancheck: - $(top_srcdir)/scripts/mancheck.sh $(srcdir)/%D% +MANFILES = $(dist_noinst_man_MANS) $(dist_man_MANS) $(nodist_man_MANS) + +PHONY += mancheck + +mancheck_verbose = $(mancheck_verbose_@AM_V@) +mancheck_verbose_ = $(mancheck_verbose_@AM_DEFAULT_V@) +mancheck_verbose_0 = @echo MANCHECK $(_MTGT); +_MTGT = $(subst ^,/,$(subst mancheck-,,$@)) +mancheck-%: + $(mancheck_verbose)scripts/mancheck.sh $(_MTGT) + +mancheck: $(foreach manfile, $(MANFILES), $(addprefix mancheck-,$(subst /,^,$(manfile)))) + +CHECKS += mancheck if BUILD_LINUX # The manual pager in most Linux distros defaults to "BSD" when .Os is blank, diff --git a/sys/contrib/openzfs/man/man1/cstyle.1 b/sys/contrib/openzfs/man/man1/cstyle.1 index 241c82edd5a8..8f29129ce175 100644 --- a/sys/contrib/openzfs/man/man1/cstyle.1 +++ b/sys/contrib/openzfs/man/man1/cstyle.1 @@ -21,7 +21,7 @@ .\" .\" CDDL HEADER END .\" -.Dd May 26, 2021 +.Dd April 4, 2022 .Dt CSTYLE 1 .Os . diff --git a/sys/contrib/openzfs/man/man1/arcstat.1 b/sys/contrib/openzfs/man/man1/zarcstat.1 index f2474fbb701f..3633c5d417fe 100644 --- a/sys/contrib/openzfs/man/man1/arcstat.1 +++ b/sys/contrib/openzfs/man/man1/zarcstat.1 @@ -13,12 +13,12 @@ .\" Copyright (c) 2015 by Delphix. All rights reserved. .\" Copyright (c) 2020 by AJ Jordan. All rights reserved. .\" -.Dd December 23, 2022 -.Dt ARCSTAT 1 +.Dd September 19, 2024 +.Dt ZARCSTAT 1 .Os . .Sh NAME -.Nm arcstat +.Nm zarcstat .Nd report ZFS ARC and L2ARC statistics .Sh SYNOPSIS .Nm diff --git a/sys/contrib/openzfs/man/man1/zhack.1 b/sys/contrib/openzfs/man/man1/zhack.1 index f58c0527649b..63658cf930e9 100644 --- a/sys/contrib/openzfs/man/man1/zhack.1 +++ b/sys/contrib/openzfs/man/man1/zhack.1 @@ -23,7 +23,7 @@ .\" .\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS .\" -.Dd May 26, 2021 +.Dd May 3, 2023 .Dt ZHACK 1 .Os . @@ -122,6 +122,24 @@ Example: .Nm zhack Cm label repair Fl cu Ar device Fix checksums and undetach a device . +.It Xo +.Nm zhack +.Cm metaslab leak +.Op Fl f +.Ar pool +.Xc +Apply a fragmentation profile generated by +.Sy zdb +to the specified +.Ar pool Ns +\&. +.Pp +The +.Fl f +flag forces the profile to apply even if the vdevs in the +.Ar pool +don't have the same number of metaslabs as the fragmentation profile. +. .El . .Sh GLOBAL OPTIONS @@ -143,6 +161,8 @@ Search for members in .Ar dir . Can be specified more than once. +.It Fl o Ar var Ns = Ns Ar value +Set the given tunable to the provided value. .El . .Sh EXAMPLES diff --git a/sys/contrib/openzfs/man/man1/ztest.1 b/sys/contrib/openzfs/man/man1/ztest.1 index febbb62b1664..ae857bfea29c 100644 --- a/sys/contrib/openzfs/man/man1/ztest.1 +++ b/sys/contrib/openzfs/man/man1/ztest.1 @@ -24,7 +24,7 @@ .\" reserved. .\" Copyright (c) 2017, Intel Corporation. .\" -.Dd May 26, 2021 +.Dd July 12, 2025 .Dt ZTEST 1 .Os . diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4 index 683f8e2b631f..61dfe42e463d 100644 --- a/sys/contrib/openzfs/man/man4/spl.4 +++ b/sys/contrib/openzfs/man/man4/spl.4 @@ -15,7 +15,7 @@ .\" .\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved. .\" -.Dd August 24, 2020 +.Dd May 7, 2025 .Dt SPL 4 .Os . diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 5c7958667f92..11bcbf430210 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -4,6 +4,7 @@ .\" Copyright (c) 2019, 2021 by Delphix. All rights reserved. .\" Copyright (c) 2019 Datto Inc. .\" Copyright (c) 2023, 2024, 2025, Klara, Inc. +.\" .\" The contents of this file are subject to the terms of the Common Development .\" and Distribution License (the "License"). You may not use this file except .\" in compliance with the License. You can obtain a copy of the license at @@ -17,7 +18,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd May 29, 2025 +.Dd September 15, 2025 .Dt ZFS 4 .Os . @@ -601,6 +602,42 @@ new format when enabling the feature. The default is to convert all log entries. . +.It Sy vdev_read_sit_out_secs Ns = Ns Sy 600 Ns s Po 10 min Pc Pq ulong +When a slow disk outlier is detected it is placed in a sit out state. +While sitting out the disk will not participate in normal reads, instead its +data will be reconstructed as needed from parity. +Scrub operations will always read from a disk, even if it's sitting out. +A number of disks in a RAID-Z or dRAID vdev may sit out at the same time, up +to the number of parity devices. +Writes will still be issued to a disk which is sitting out to maintain full +redundancy. +Defaults to 600 seconds and a value of zero disables disk sit-outs in general, +including slow disk outlier detection. +. +.It Sy vdev_raidz_outlier_check_interval_ms Ns = Ns Sy 1000 Ns ms Po 1 sec Pc Pq ulong +How often each RAID-Z and dRAID vdev will check for slow disk outliers. +Increasing this interval will reduce the sensitivity of detection (since all +I/Os since the last check are included in the statistics), but will slow the +response to a disk developing a problem. +Defaults to once per second; setting extremely small values may cause negative +performance effects. +. +.It Sy vdev_raidz_outlier_insensitivity Ns = Ns Sy 50 Pq uint +When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is +used to determine how far out an outlier must be before it counts as an event +worth consdering. +This is phrased as "insensitivity" because larger values result in fewer +detections. +Smaller values will result in more aggressive sitting out of disks that may have +problems, but may significantly increase the rate of spurious sit-outs. +.Pp +To provide a more technical definition of this parameter, this is the multiple +of the inter-quartile range (IQR) that is being used in a Tukey's Fence +detection algorithm. +This is much higher than a normal Tukey's Fence k-value, because the +distribution under consideration is probably an extreme-value distribution, +rather than a more typical Gaussian distribution. +. .It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32 KiB Pc Pq uint During top-level vdev removal, chunks of data are copied from the vdev which may include free space in order to trade bandwidth for IOPS. @@ -2546,6 +2583,49 @@ the xattr so as to not accumulate duplicates. .It Sy zio_requeue_io_start_cut_in_line Ns = Ns Sy 0 Ns | Ns 1 Pq int Prioritize requeued I/O. . +.It Sy zfs_delete_inode Ns = Ns Sy 0 Ns | Ns 1 Pq int +Sets whether the kernel should free an inode structure when the last reference +is released, or cache it in memory. +Intended for testing/debugging. +.Pp +A live inode structure "pins" versious internal OpenZFS structures in memory, +which can result in large amounts of "unusable" memory on systems with lots of +infrequently-accessed files, until the kernel's memory pressure mechanism +asks OpenZFS to release them. +.Pp +The default value of +.Sy 0 +always caches inodes that appear to still exist on disk. +Setting it to +.Sy 1 +will immediately release unused inodes and their associated memory back to the +dbuf cache or the ARC for reuse, but may reduce performance if inodes are +frequently evicted and reloaded. +.Pp +This parameter is only available on Linux. +. +.It Sy zfs_delete_dentry Ns = Ns Sy 0 Ns | Ns 1 Pq int +Sets whether the kernel should free a dentry structure when it is no longer +required, or hold it in the dentry cache. +Intended for testing/debugging. +. +Since a dentry structure holds an inode reference, a cached dentry can "pin" +an inode in memory indefinitely, along with associated OpenZFS structures (See +.Sy zfs_delete_inode ) . +.Pp +The default value of +.Sy 0 +instructs the kernel to cache entries and their associated inodes when they +are no longer directly referenced. +They will be reclaimed as part of the kernel's normal cache management +processes. +Setting it to +.Sy 1 +will instruct the kernel to release directory entries and their inodes as soon +as they are no longer referenced by the filesystem. +.Pp +This parameter is only available on Linux. +. .It Sy zio_taskq_batch_pct Ns = Ns Sy 80 Ns % Pq uint Percentage of online CPUs which will run a worker thread for I/O. These workers are responsible for I/O work such as compression, encryption, diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 index d2f817631c15..299a23720201 100644 --- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 +++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 @@ -9,7 +9,7 @@ .\" source. A copy of the CDDL is also available via the Internet at .\" http://www.illumos.org/license/CDDL. .\" -.Dd May 26, 2021 +.Dd October 8, 2024 .Dt VDEV_ID.CONF 5 .Os . diff --git a/sys/contrib/openzfs/man/man7/dracut.zfs.7 b/sys/contrib/openzfs/man/man7/dracut.zfs.7 index fb5da553af6e..3d051d4d3343 100644 --- a/sys/contrib/openzfs/man/man7/dracut.zfs.7 +++ b/sys/contrib/openzfs/man/man7/dracut.zfs.7 @@ -1,7 +1,7 @@ .\" SPDX-License-Identifier: CDDL-1.0 .\" SPDX-License-Identifier: 0BSD .\" -.Dd March 28, 2023 +.Dd July 13, 2024 .Dt DRACUT.ZFS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7 index acabe6b6613a..0fb28d7db13c 100644 --- a/sys/contrib/openzfs/man/man7/vdevprops.7 +++ b/sys/contrib/openzfs/man/man7/vdevprops.7 @@ -19,9 +19,9 @@ .\" .\" CDDL HEADER END .\" -.\" Copyright (c) 2021 Klara, Inc. +.\" Copyright (c) 2021, 2025, Klara, Inc. .\" -.Dd October 30, 2022 +.Dd July 23, 2024 .Dt VDEVPROPS 7 .Os . @@ -106,11 +106,17 @@ The number of children belonging to this vdev .It Sy read_errors , write_errors , checksum_errors , initialize_errors , trim_errors The number of errors of each type encountered by this vdev .It Sy slow_ios -The number of slow I/Os encountered by this vdev, -These represent I/O operations that didn't complete in +This indicates the number of slow I/O operations encountered by this vdev. +A slow I/O is defined as an operation that did not complete within the .Sy zio_slow_io_ms -milliseconds +threshold in milliseconds .Pq Sy 30000 No by default . +For +.Sy RAIDZ +and +.Sy DRAID +configurations, this value also represents the number of times the vdev was +identified as an outlier and excluded from participating in read I/O operations. .It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops The number of I/O operations of each type performed by this vdev .It Xo @@ -150,6 +156,31 @@ The amount of space to reserve for the EFI system partition .It Sy failfast If this device should propagate BIO errors back to ZFS, used to disable failfast. +.It Sy sit_out +Only valid for +.Sy RAIDZ +and +.Sy DRAID +vdevs. +True when a slow disk outlier was detected and the vdev is currently in a sit +out state. +This property can be manually set to cause vdevs to sit out. +It will also be automatically set by the +.Sy autosit +logic if that is enabled. +While sitting out, the vdev will not participate in normal reads, instead its +data will be reconstructed as needed from parity. +.It Sy autosit +Only valid for +.Sy RAIDZ +and +.Sy DRAID +vdevs. +If set, this enables the kernel-level slow disk detection logic. +This logic automatically causes any vdevs that are significant negative +performance outliers to sit out, as described in the +.Sy sit_out +property. .It Sy path The path to the device for this vdev .It Sy allocating diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7 index 5c736e53670d..bb2178d85bcd 100644 --- a/sys/contrib/openzfs/man/man7/zfsconcepts.7 +++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright 2023 Klara, Inc. .\" -.Dd October 6, 2023 +.Dd October 2, 2024 .Dt ZFSCONCEPTS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7 index ac3152cb5d51..77e994b912b6 100644 --- a/sys/contrib/openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/openzfs/man/man7/zfsprops.7 @@ -39,7 +39,7 @@ .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP. .\" -.Dd June 29, 2024 +.Dd September 13, 2025 .Dt ZFSPROPS 7 .Os . @@ -1192,18 +1192,26 @@ keylocation can be with either .Nm zfs Cm set or .Nm zfs Cm change-key . +.Pp If .Sy prompt -is selected ZFS will ask for the key at the command prompt when it is required -to access the encrypted data (see +is selected, ZFS will expect the key to be provided when it is required to +access the encrypted data (see .Nm zfs Cm load-key for details). -This setting will also allow the key to be passed in via the standard input -stream, -but users should be careful not to place keys which should be kept secret on -the command line. -If a file URI is selected, the key will be loaded from the +If stdin is a TTY, then ZFS will ask for the key to be provided. +Otherwise, stdin is expected to be the key to use and will be processed as such. +Users should be careful not to place keys which should be kept secret on the +command line, as most operating systems may expose command line arguments to +other processes. +If the +.Dq raw +.Sy keyformat +was used, then the key must be provided via stdin. +.Pp +If a file URL is selected, the key will be loaded from the specified absolute file path. +.Pp If an HTTPS or HTTP URL is selected, it will be GETted using .Xr fetch 3 , libcurl, or nothing, depending on compile-time configuration and run-time diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7 index 10dfd1f92936..b4404a6eb58d 100644 --- a/sys/contrib/openzfs/man/man7/zpool-features.7 +++ b/sys/contrib/openzfs/man/man7/zpool-features.7 @@ -19,7 +19,7 @@ .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd October 2, 2024 +.Dd July 23, 2025 .Dt ZPOOL-FEATURES 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7 index dafe3bffc453..b9c8926d835d 100644 --- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 +++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd April 7, 2023 +.Dd August 6, 2025 .Dt ZPOOLCONCEPTS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zpoolprops.7 b/sys/contrib/openzfs/man/man7/zpoolprops.7 index 5d84753193ee..d3b4c2376943 100644 --- a/sys/contrib/openzfs/man/man7/zpoolprops.7 +++ b/sys/contrib/openzfs/man/man7/zpoolprops.7 @@ -29,7 +29,7 @@ .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" Copyright (c) 2023, Klara Inc. .\" -.Dd November 18, 2024 +.Dd December 4, 2024 .Dt ZPOOLPROPS 7 .Os . diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index 0a5b6af73fdb..c3290ea14769 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -15,7 +15,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd April 23, 2025 +.Dd August 12, 2025 .Dt ZDB 8 .Os . @@ -69,6 +69,13 @@ .Op Fl U Ar cache .Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns … .Nm +.Fl -allocated-map +.Op Fl mAFLPXY +.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns … +.Op Fl t Ar txg +.Op Fl U Ar cache +.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns … +.Nm .Fl O .Op Fl K Ar key .Ar dataset path @@ -128,6 +135,11 @@ that zdb may interpret inconsistent pool data and behave erratically. .Sh OPTIONS Display options: .Bl -tag -width Ds +.It Fl Sy -allocated-map +Prints out a list of all the allocated regions in the pool. +Primarily intended for use with the +.Nm zhack metaslab leak +subcommand. .It Fl b , -block-stats Display statistics regarding the number, size .Pq logical, physical and allocated diff --git a/sys/contrib/openzfs/man/man8/zed.8.in b/sys/contrib/openzfs/man/man8/zed.8.in index c90a1834403b..2d19f2d8496b 100644 --- a/sys/contrib/openzfs/man/man8/zed.8.in +++ b/sys/contrib/openzfs/man/man8/zed.8.in @@ -13,7 +13,7 @@ .\" .\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) .\" -.Dd May 26, 2021 +.Dd August 22, 2022 .Dt ZED 8 .Os . @@ -158,6 +158,8 @@ Multiple ZEDLETs may be invoked for a given zevent. ZEDLETs are executables invoked by the ZED in response to a given zevent. They should be written under the presumption they can be invoked concurrently, and they should use appropriate locking to access any shared resources. +The one exception to this are "synchronous zedlets", which are described later +in this page. Common variables used by ZEDLETs can be stored in the default rc file which is sourced by scripts; these variables should be prefixed with .Sy ZED_ . @@ -233,6 +235,36 @@ and .Sy ZPOOL . These variables may be overridden in the rc file. . +.Sh Synchronous ZEDLETS +ZED's normal behavior is to spawn off zedlets in parallel and ignore their +completion order. +This means that ZED can potentially +have zedlets for event ID number 2 starting before zedlets for event ID number +1 have finished. +Most of the time this is fine, and it actually helps when the system is getting +hammered with hundreds of events. +.Pp +However, there are times when you want your zedlets to be executed in sequence +with the event ID. +That is where synchronous zedlets come in. +.Pp +ZED will wait for all previously spawned zedlets to finish before running +a synchronous zedlet. +Synchronous zedlets are guaranteed to be the only +zedlet running. +No other zedlets may run in parallel with a synchronous zedlet. +Users should be careful to only use synchronous zedlets when needed, since +they decrease parallelism. +.Pp +To make a zedlet synchronous, simply add a "-sync-" immediately following the +event name in the zedlet's file name: +.Pp +.Sy EVENT_NAME-sync-ZEDLETNAME.sh +.Pp +For example, if you wanted a synchronous statechange script: +.Pp +.Sy statechange-sync-myzedlet.sh +. .Sh FILES .Bl -tag -width "-c" .It Pa @sysconfdir@/zfs/zed.d diff --git a/sys/contrib/openzfs/man/man8/zfs-allow.8 b/sys/contrib/openzfs/man/man8/zfs-allow.8 index 5a8e80bf6a43..e3b0e1ab3e12 100644 --- a/sys/contrib/openzfs/man/man8/zfs-allow.8 +++ b/sys/contrib/openzfs/man/man8/zfs-allow.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd September 8, 2025 .Dt ZFS-ALLOW 8 .Os . @@ -212,7 +212,8 @@ receive subcommand Must also have the \fBmount\fR and \fBcreate\fR ability, requ release subcommand Allows releasing a user hold which might destroy the snapshot rename subcommand Must also have the \fBmount\fR and \fBcreate\fR ability in the new parent rollback subcommand Must also have the \fBmount\fR ability -send subcommand +send subcommand Allows sending a replication stream of a dataset. +send:raw subcommand Only allows sending raw replication streams, preventing encrypted datasets being sent in decrypted form. share subcommand Allows sharing file systems over NFS or SMB protocols snapshot subcommand Must also have the \fBmount\fR ability diff --git a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 b/sys/contrib/openzfs/man/man8/zfs-bookmark.8 index 083ff46d241b..5a0933820020 100644 --- a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 +++ b/sys/contrib/openzfs/man/man8/zfs-bookmark.8 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved. .\" -.Dd May 12, 2022 +.Dd July 11, 2022 .Dt ZFS-BOOKMARK 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-clone.8 b/sys/contrib/openzfs/man/man8/zfs-clone.8 index cd412815f5fe..9609cf2ce36a 100644 --- a/sys/contrib/openzfs/man/man8/zfs-clone.8 +++ b/sys/contrib/openzfs/man/man8/zfs-clone.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-CLONE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8 index 91878056cc7d..58bde5799240 100644 --- a/sys/contrib/openzfs/man/man8/zfs-create.8 +++ b/sys/contrib/openzfs/man/man8/zfs-create.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd June 2, 2023 .Dt ZFS-CREATE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-destroy.8 b/sys/contrib/openzfs/man/man8/zfs-destroy.8 index 38359be02430..6a6791f7a44e 100644 --- a/sys/contrib/openzfs/man/man8/zfs-destroy.8 +++ b/sys/contrib/openzfs/man/man8/zfs-destroy.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd February 5, 2025 .Dt ZFS-DESTROY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-diff.8 b/sys/contrib/openzfs/man/man8/zfs-diff.8 index d4c48f4109be..5b94ea524666 100644 --- a/sys/contrib/openzfs/man/man8/zfs-diff.8 +++ b/sys/contrib/openzfs/man/man8/zfs-diff.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-DIFF 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-hold.8 b/sys/contrib/openzfs/man/man8/zfs-hold.8 index 0c88937f0dc8..a877e428f88b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-hold.8 +++ b/sys/contrib/openzfs/man/man8/zfs-hold.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd November 8, 2022 .Dt ZFS-HOLD 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-jail.8 b/sys/contrib/openzfs/man/man8/zfs-jail.8 index 53499a279d05..569f5f57eab4 100644 --- a/sys/contrib/openzfs/man/man8/zfs-jail.8 +++ b/sys/contrib/openzfs/man/man8/zfs-jail.8 @@ -37,7 +37,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZFS-JAIL 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8 index 677d8292e207..42eff94f9762 100644 --- a/sys/contrib/openzfs/man/man8/zfs-list.8 +++ b/sys/contrib/openzfs/man/man8/zfs-list.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd February 8, 2024 +.Dd August 25, 2025 .Dt ZFS-LIST 8 .Os . @@ -50,27 +50,25 @@ .Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns … . .Sh DESCRIPTION -If specified, you can list property information by the absolute pathname or the -relative pathname. -By default, all file systems and volumes are displayed. +By default, all file systems and volumes are displayed, with the following +fields: +.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint . Snapshots are displayed if the .Sy listsnapshots pool property is .Sy on .Po the default is .Sy off -.Pc , +.Pc or if the .Fl t Sy snapshot or .Fl t Sy all options are specified. -The following fields are displayed: -.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint . .Bl -tag -width "-H" .It Fl H Used for scripting mode. -Do not print headers and separate fields by a single tab instead of arbitrary +Do not print headers, and separate fields by a single tab instead of arbitrary white space. .It Fl j , -json Op Ar --json-int Print the output in JSON format. @@ -87,7 +85,7 @@ of will display only the dataset and its direct children. .It Fl o Ar property A comma-separated list of properties to display. -The property must be: +Each property must be: .Bl -bullet -compact .It One of the properties described in the @@ -125,30 +123,41 @@ section of or the value .Sy name to sort by the dataset name. -Multiple properties can be specified at one time using multiple +Multiple properties can be specified to operate together using multiple .Fl s -property options. +or +.Fl S +options. Multiple .Fl s -options are evaluated from left to right in decreasing order of importance. -The following is a list of sorting criteria: +and +.Fl S +options are evaluated from left to right to supply sort keys in +decreasing order of priority. +Property types operate as follows: .Bl -bullet -compact .It Numeric types sort in numeric order. .It String types sort in alphabetical order. .It -Types inappropriate for a row sort that row to the literal bottom, regardless of -the specified ordering. +Types inappropriate for a row sort that row to the literal bottom, +regardless of the specified ordering. .El .Pp -If no sorting options are specified the existing behavior of -.Nm zfs Cm list -is preserved. +If no sort columns are specified, or if two lines of output would sort +equally across all specified columns, then datasets and bookmarks are +sorted by name, whereas snapshots are sorted first by the name of their +dataset and then by the time of their creation. +When no sort columns are specified but snapshots are listed, this +default behavior causes snapshots to be grouped under their datasets in +chronological order by creation time. .It Fl S Ar property Same as .Fl s , -but sorts by property in descending order. +but sorts by +.Ar property +in descending order. .It Fl t Ar type A comma-separated list of types to display, where .Ar type diff --git a/sys/contrib/openzfs/man/man8/zfs-load-key.8 b/sys/contrib/openzfs/man/man8/zfs-load-key.8 index 7838c46d9e77..3a11cea99fd6 100644 --- a/sys/contrib/openzfs/man/man8/zfs-load-key.8 +++ b/sys/contrib/openzfs/man/man8/zfs-load-key.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd January 13, 2020 +.Dd July 11, 2022 .Dt ZFS-LOAD-KEY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in index ea470247daac..9e44ea30c636 100644 --- a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in +++ b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in @@ -23,7 +23,7 @@ .\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION .\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. .\" -.Dd May 31, 2021 +.Dd November 30, 2021 .Dt ZFS-MOUNT-GENERATOR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8 index 9fca6fffd5bb..2689b6dc345b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-mount.8 +++ b/sys/contrib/openzfs/man/man8/zfs-mount.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd February 16, 2019 +.Dd October 12, 2024 .Dt ZFS-MOUNT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-project.8 b/sys/contrib/openzfs/man/man8/zfs-project.8 index 36547680f53e..4ebfdf6ffe4f 100644 --- a/sys/contrib/openzfs/man/man8/zfs-project.8 +++ b/sys/contrib/openzfs/man/man8/zfs-project.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZFS-PROJECT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-promote.8 b/sys/contrib/openzfs/man/man8/zfs-promote.8 index 767045812607..435a7a5d0144 100644 --- a/sys/contrib/openzfs/man/man8/zfs-promote.8 +++ b/sys/contrib/openzfs/man/man8/zfs-promote.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-PROMOTE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-rename.8 b/sys/contrib/openzfs/man/man8/zfs-rename.8 index 4cf192c0682b..8fedc67469e6 100644 --- a/sys/contrib/openzfs/man/man8/zfs-rename.8 +++ b/sys/contrib/openzfs/man/man8/zfs-rename.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-RENAME 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 b/sys/contrib/openzfs/man/man8/zfs-rewrite.8 index a3a037f3794a..ca5340c7e5eb 100644 --- a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 +++ b/sys/contrib/openzfs/man/man8/zfs-rewrite.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2025 iXsystems, Inc. .\" -.Dd May 6, 2025 +.Dd July 23, 2025 .Dt ZFS-REWRITE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index f7c6b840303c..6c5f6b94afd5 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2024, Klara, Inc. .\" -.Dd October 2, 2024 +.Dd August 29, 2025 .Dt ZFS-SEND 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8 index 67f4d6eba171..08daf09d05f8 100644 --- a/sys/contrib/openzfs/man/man8/zfs-set.8 +++ b/sys/contrib/openzfs/man/man8/zfs-set.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd April 20, 2024 +.Dd October 12, 2024 .Dt ZFS-SET 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-share.8 b/sys/contrib/openzfs/man/man8/zfs-share.8 index f7a09a189182..e9c32a44b0c7 100644 --- a/sys/contrib/openzfs/man/man8/zfs-share.8 +++ b/sys/contrib/openzfs/man/man8/zfs-share.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 17, 2021 +.Dd July 11, 2022 .Dt ZFS-SHARE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 b/sys/contrib/openzfs/man/man8/zfs-snapshot.8 index 3ddd1273c8e8..8f4b2c335f09 100644 --- a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 +++ b/sys/contrib/openzfs/man/man8/zfs-snapshot.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-SNAPSHOT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 b/sys/contrib/openzfs/man/man8/zfs-upgrade.8 index bac74e37aef9..a5ce2b760da4 100644 --- a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 +++ b/sys/contrib/openzfs/man/man8/zfs-upgrade.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd July 11, 2022 .Dt ZFS-UPGRADE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-userspace.8 b/sys/contrib/openzfs/man/man8/zfs-userspace.8 index d7a4d18e83b1..c255d911740d 100644 --- a/sys/contrib/openzfs/man/man8/zfs-userspace.8 +++ b/sys/contrib/openzfs/man/man8/zfs-userspace.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd July 11, 2022 .Dt ZFS-USERSPACE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-wait.8 b/sys/contrib/openzfs/man/man8/zfs-wait.8 index 554a67455c60..e5c60010d2f9 100644 --- a/sys/contrib/openzfs/man/man8/zfs-wait.8 +++ b/sys/contrib/openzfs/man/man8/zfs-wait.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 31, 2021 +.Dd July 11, 2022 .Dt ZFS-WAIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8 index 7ad0ac89463c..a56a304e82b2 100644 --- a/sys/contrib/openzfs/man/man8/zfs-zone.8 +++ b/sys/contrib/openzfs/man/man8/zfs-zone.8 @@ -38,7 +38,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright 2021 Klara, Inc. .\" -.Dd June 3, 2022 +.Dd July 11, 2022 .Dt ZFS-ZONE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8 index e16a3a82b672..b7566a727469 100644 --- a/sys/contrib/openzfs/man/man8/zfs.8 +++ b/sys/contrib/openzfs/man/man8/zfs.8 @@ -37,7 +37,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd April 18, 2025 +.Dd May 12, 2025 .Dt ZFS 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 index eef0ce68f17b..465e336d170c 100644 --- a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 +++ b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd April 17, 2020 +.Dd July 11, 2022 .Dt ZFS_IDS_TO_PATH 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zgenhostid.8 b/sys/contrib/openzfs/man/man8/zgenhostid.8 index 2b5b4fc18216..ff564880f97d 100644 --- a/sys/contrib/openzfs/man/man8/zgenhostid.8 +++ b/sys/contrib/openzfs/man/man8/zgenhostid.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC. .\" -.Dd May 26, 2021 +.Dd July 11, 2022 .Dt ZGENHOSTID 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zinject.8 b/sys/contrib/openzfs/man/man8/zinject.8 index 1d9e43aed5ec..704f6a7accd8 100644 --- a/sys/contrib/openzfs/man/man8/zinject.8 +++ b/sys/contrib/openzfs/man/man8/zinject.8 @@ -138,6 +138,20 @@ This injector is automatically cleared after the import is finished. . .It Xo .Nm zinject +.Fl E Ar delay +.Op Fl a +.Op Fl m +.Op Fl f Ar freq +.Op Fl l Ar level +.Op Fl r Ar range +.Op Fl T Ar iotype +.Op Fl t Ar type Ns | Ns Fl b Ar bookmark +.Xc +Inject pipeline ready stage delays for the given object or bookmark. +The delay is specified in milliseconds. +. +.It Xo +.Nm zinject .Fl I .Op Fl s Ar seconds Ns | Ns Fl g Ar txgs .Ar pool diff --git a/sys/contrib/openzfs/man/man8/zpool-attach.8 b/sys/contrib/openzfs/man/man8/zpool-attach.8 index 51d876767666..04996ed4fa11 100644 --- a/sys/contrib/openzfs/man/man8/zpool-attach.8 +++ b/sys/contrib/openzfs/man/man8/zpool-attach.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 28, 2023 +.Dd November 8, 2023 .Dt ZPOOL-ATTACH 8 .Os . @@ -39,24 +39,24 @@ .Cm attach .Op Fl fsw .Oo Fl o Ar property Ns = Ns Ar value Oc -.Ar pool device new_device +.Ar pool vdev new_device . .Sh DESCRIPTION Attaches .Ar new_device to the existing -.Ar device . +.Ar vdev . The behavior differs depending on if the existing -.Ar device +.Ar vdev is a RAID-Z device, or a mirror/plain device. .Pp -If the existing device is a mirror or plain device +If the existing vdev is a mirror or plain device .Pq e.g. specified as Qo Li sda Qc or Qq Li mirror-7 , -the new device will be mirrored with the existing device, a resilver will be +the new device will be mirrored with the existing vdev, a resilver will be initiated, and the new device will contribute to additional redundancy once the resilver completes. If -.Ar device +.Ar vdev is not currently part of a mirrored configuration, .Ar device automatically transforms into a two-way mirror of @@ -64,7 +64,7 @@ automatically transforms into a two-way mirror of and .Ar new_device . If -.Ar device +.Ar vdev is part of a two-way mirror, attaching .Ar new_device creates a three-way mirror, and so on. @@ -72,7 +72,7 @@ In either case, .Ar new_device begins to resilver immediately and any running scrub is canceled. .Pp -If the existing device is a RAID-Z device +If the existing vdev is a RAID-Z device .Pq e.g. specified as Qq Ar raidz2-0 , the new device will become part of that RAID-Z group. A "raidz expansion" will be initiated, and once the expansion completes, @@ -112,7 +112,7 @@ the checksums of all blocks which have been copied during the expansion. Forces use of .Ar new_device , even if it appears to be in use. -Not all devices can be overridden in this manner. +Not all vdevs can be overridden in this manner. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See the @@ -121,7 +121,7 @@ manual page for a list of valid properties that can be set. The only property supported at the moment is .Sy ashift . .It Fl s -When attaching to a mirror or plain device, the +When attaching to a mirror or plain vdev, the .Ar new_device is reconstructed sequentially to restore redundancy as quickly as possible. Checksums are not verified during sequential reconstruction so a scrub is diff --git a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 index d97d10d5df6e..b654f669cfa2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 +++ b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZPOOL-CHECKPOINT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-clear.8 b/sys/contrib/openzfs/man/man8/zpool-clear.8 index 19cd4be36408..70cd8325bd0e 100644 --- a/sys/contrib/openzfs/man/man8/zpool-clear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-clear.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd April 29, 2024 .Dt ZPOOL-CLEAR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8 index 490c67629a20..a36ae260a158 100644 --- a/sys/contrib/openzfs/man/man8/zpool-create.8 +++ b/sys/contrib/openzfs/man/man8/zpool-create.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-CREATE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-destroy.8 b/sys/contrib/openzfs/man/man8/zpool-destroy.8 index f49f29804ad7..82f3f3e203d6 100644 --- a/sys/contrib/openzfs/man/man8/zpool-destroy.8 +++ b/sys/contrib/openzfs/man/man8/zpool-destroy.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-DESTROY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-detach.8 b/sys/contrib/openzfs/man/man8/zpool-detach.8 index ae02dbc2d5b8..79a44310110d 100644 --- a/sys/contrib/openzfs/man/man8/zpool-detach.8 +++ b/sys/contrib/openzfs/man/man8/zpool-detach.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-DETACH 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8 index 2d32dce2bb65..36a9864dc73b 100644 --- a/sys/contrib/openzfs/man/man8/zpool-events.8 +++ b/sys/contrib/openzfs/man/man8/zpool-events.8 @@ -190,6 +190,16 @@ Issued when a scrub is resumed on a pool. .It Sy scrub.paused Issued when a scrub is paused on a pool. .It Sy bootfs.vdev.attach +.It Sy sitout +Issued when a +.Sy RAIDZ +or +.Sy DRAID +vdev triggers the +.Sy autosit +logic. +This logic detects when a disk in such a vdev is significantly slower than its +peers, and sits them out temporarily to preserve the performance of the pool. .El . .Sh PAYLOADS diff --git a/sys/contrib/openzfs/man/man8/zpool-export.8 b/sys/contrib/openzfs/man/man8/zpool-export.8 index 171a7541c6d2..02495c088f94 100644 --- a/sys/contrib/openzfs/man/man8/zpool-export.8 +++ b/sys/contrib/openzfs/man/man8/zpool-export.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-EXPORT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8 index 1d6d1f08afa6..bfe1bae7619f 100644 --- a/sys/contrib/openzfs/man/man8/zpool-get.8 +++ b/sys/contrib/openzfs/man/man8/zpool-get.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd October 12, 2024 .Dt ZPOOL-GET 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-history.8 b/sys/contrib/openzfs/man/man8/zpool-history.8 index f15086eabc47..f02168951ff2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-history.8 +++ b/sys/contrib/openzfs/man/man8/zpool-history.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-HISTORY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-import.8 b/sys/contrib/openzfs/man/man8/zpool-import.8 index 9076f5c34929..c6d5f222b6b2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-import.8 +++ b/sys/contrib/openzfs/man/man8/zpool-import.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-IMPORT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-initialize.8 b/sys/contrib/openzfs/man/man8/zpool-initialize.8 index 39579a58010e..5299a897cb97 100644 --- a/sys/contrib/openzfs/man/man8/zpool-initialize.8 +++ b/sys/contrib/openzfs/man/man8/zpool-initialize.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd May 27, 2021 +.Dd July 30, 2025 .Dt ZPOOL-INITIALIZE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8 index d8c21d0cfc6c..5dd9c9d55e20 100644 --- a/sys/contrib/openzfs/man/man8/zpool-iostat.8 +++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd January 29, 2024 .Dt ZPOOL-IOSTAT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 index ba3d1509aa75..b807acaaede3 100644 --- a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 31, 2021 +.Dd July 11, 2022 .Dt ZPOOL-LABELCLEAR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8 index b720e203c1c9..106399941f98 100644 --- a/sys/contrib/openzfs/man/man8/zpool-list.8 +++ b/sys/contrib/openzfs/man/man8/zpool-list.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd October 12, 2024 .Dt ZPOOL-LIST 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8 index 49b1f34ad5d5..388c7634acce 100644 --- a/sys/contrib/openzfs/man/man8/zpool-offline.8 +++ b/sys/contrib/openzfs/man/man8/zpool-offline.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd December 21, 2023 .Dt ZPOOL-OFFLINE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-reguid.8 b/sys/contrib/openzfs/man/man8/zpool-reguid.8 index 77101fc07326..b98c88e320de 100644 --- a/sys/contrib/openzfs/man/man8/zpool-reguid.8 +++ b/sys/contrib/openzfs/man/man8/zpool-reguid.8 @@ -29,7 +29,7 @@ .\" Copyright (c) 2024, Klara Inc. .\" Copyright (c) 2024, Mateusz Piotrowski .\" -.Dd June 21, 2023 +.Dd August 26, 2024 .Dt ZPOOL-REGUID 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-remove.8 b/sys/contrib/openzfs/man/man8/zpool-remove.8 index d10a92e49bbe..4d5fc431d332 100644 --- a/sys/contrib/openzfs/man/man8/zpool-remove.8 +++ b/sys/contrib/openzfs/man/man8/zpool-remove.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd November 19, 2024 .Dt ZPOOL-REMOVE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-reopen.8 b/sys/contrib/openzfs/man/man8/zpool-reopen.8 index 594cff3d16d8..c4e10f0a546e 100644 --- a/sys/contrib/openzfs/man/man8/zpool-reopen.8 +++ b/sys/contrib/openzfs/man/man8/zpool-reopen.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 2, 2021 +.Dd July 11, 2022 .Dt ZPOOL-REOPEN 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-replace.8 b/sys/contrib/openzfs/man/man8/zpool-replace.8 index 9f3156eeb3ef..651af13b19b8 100644 --- a/sys/contrib/openzfs/man/man8/zpool-replace.8 +++ b/sys/contrib/openzfs/man/man8/zpool-replace.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 29, 2021 +.Dd July 11, 2022 .Dt ZPOOL-REPLACE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-resilver.8 b/sys/contrib/openzfs/man/man8/zpool-resilver.8 index 2161d77f62ed..59c4be5db209 100644 --- a/sys/contrib/openzfs/man/man8/zpool-resilver.8 +++ b/sys/contrib/openzfs/man/man8/zpool-resilver.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZPOOL-RESILVER 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8 index 0ecf8bd3851f..cf7ead5788bf 100644 --- a/sys/contrib/openzfs/man/man8/zpool-scrub.8 +++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd December 11, 2024 +.Dd August 6, 2025 .Dt ZPOOL-SCRUB 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-split.8 b/sys/contrib/openzfs/man/man8/zpool-split.8 index a67c865cf30c..ee4c6384cf23 100644 --- a/sys/contrib/openzfs/man/man8/zpool-split.8 +++ b/sys/contrib/openzfs/man/man8/zpool-split.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 2, 2021 +.Dd July 11, 2022 .Dt ZPOOL-SPLIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8 index a7f3e088043b..108a1067b384 100644 --- a/sys/contrib/openzfs/man/man8/zpool-status.8 +++ b/sys/contrib/openzfs/man/man8/zpool-status.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd February 14, 2024 +.Dd May 20, 2025 .Dt ZPOOL-STATUS 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-sync.8 b/sys/contrib/openzfs/man/man8/zpool-sync.8 index 8f438f363e83..d1dc05d0c202 100644 --- a/sys/contrib/openzfs/man/man8/zpool-sync.8 +++ b/sys/contrib/openzfs/man/man8/zpool-sync.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-SYNC 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-trim.8 b/sys/contrib/openzfs/man/man8/zpool-trim.8 index 18723e1be0d2..c4e849019789 100644 --- a/sys/contrib/openzfs/man/man8/zpool-trim.8 +++ b/sys/contrib/openzfs/man/man8/zpool-trim.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd May 27, 2021 +.Dd July 30, 2025 .Dt ZPOOL-TRIM 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 b/sys/contrib/openzfs/man/man8/zpool-upgrade.8 index 20632ae4bba0..adae47f82eb1 100644 --- a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 +++ b/sys/contrib/openzfs/man/man8/zpool-upgrade.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-UPGRADE 8 .Os . @@ -65,10 +65,10 @@ property). .Cm upgrade .Fl v .Xc -Displays legacy ZFS versions supported by the this version of ZFS. +Displays legacy ZFS versions supported by this version of ZFS. See .Xr zpool-features 7 -for a description of feature flags features supported by this version of ZFS. +for a description of features supported by this version of ZFS. .It Xo .Nm zpool .Cm upgrade diff --git a/sys/contrib/openzfs/man/man8/zpool-wait.8 b/sys/contrib/openzfs/man/man8/zpool-wait.8 index 0ffb4badfb7b..28a51d29a913 100644 --- a/sys/contrib/openzfs/man/man8/zpool-wait.8 +++ b/sys/contrib/openzfs/man/man8/zpool-wait.8 @@ -28,7 +28,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd January 29, 2024 .Dt ZPOOL-WAIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index b96944050594..3bfef780b298 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd February 14, 2024 +.Dd November 19, 2024 .Dt ZPOOL 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zstream.8 b/sys/contrib/openzfs/man/man8/zstream.8 index 03a8479c9e6a..5b3d063bc4a5 100644 --- a/sys/contrib/openzfs/man/man8/zstream.8 +++ b/sys/contrib/openzfs/man/man8/zstream.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd October 4, 2022 +.Dd November 10, 2022 .Dt ZSTREAM 8 .Os . diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index 362d2295e091..58a80dc4402c 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -4,7 +4,7 @@ ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement ZFS_MODULE_CFLAGS += -Wmissing-prototypes -ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@ +ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@ ifneq ($(KBUILD_EXTMOD),) zfs_include = @abs_top_srcdir@/include diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c index 6d3bcca9f995..dcb0a391dda4 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c @@ -38,11 +38,14 @@ kfpu_begin(); E(s, d, b); kfpu_end(); \ } +#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \ + defined(__PPC64__) /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } +#endif #if defined(__x86_64) diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c index 2efd9fcf4c99..a85a71a83df4 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c @@ -38,11 +38,14 @@ kfpu_begin(); E(s, d, b); kfpu_end(); \ } +#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__arm__) || defined(__PPC64__) /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } +#endif #if defined(__x86_64) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c index b15a3e6e38c0..cb5787269db2 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c @@ -1175,7 +1175,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) int count = 0; zfs_acl_phys_t acl_phys; - if (zp->z_zfsvfs->z_replay == B_FALSE) { + if (ZTOV(zp) != NULL && zp->z_zfsvfs->z_replay == B_FALSE) { ASSERT_VOP_IN_SEQC(ZTOV(zp)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index a222c5de4a2a..4de48e013ec4 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -674,7 +674,6 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) zfs_uio_t uio; int *eofp = ap->a_eofflag; off_t dots_offset; - ssize_t orig_resid; int error; zfs_uio_init(&uio, ap->a_uio); @@ -694,11 +693,13 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) return (0); } - orig_resid = zfs_uio_resid(&uio); error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, &dots_offset); - if (error != 0) - goto err; + if (error != 0) { + if (error == ENAMETOOLONG) /* ran out of destination space */ + error = 0; + return (error); + } if (zfs_uio_offset(&uio) != dots_offset) return (SET_ERROR(EINVAL)); @@ -711,11 +712,8 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) entry.d_reclen = sizeof (entry); error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); if (error != 0) { -err: - if (error == ENAMETOOLONG) { - error = orig_resid == zfs_uio_resid(&uio) ? - EINVAL : 0; - } + if (error == ENAMETOOLONG) + error = 0; return (SET_ERROR(error)); } if (eofp != NULL) @@ -1060,21 +1058,17 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) zfs_uio_t uio; int *eofp = ap->a_eofflag; off_t dots_offset; - ssize_t orig_resid; int error; zfs_uio_init(&uio, ap->a_uio); - orig_resid = zfs_uio_resid(&uio); ASSERT3S(vp->v_type, ==, VDIR); error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, &uio, &dots_offset); if (error != 0) { - if (error == ENAMETOOLONG) { /* ran out of destination space */ - error = orig_resid == zfs_uio_resid(&uio) ? - EINVAL : 0; - } + if (error == ENAMETOOLONG) /* ran out of destination space */ + error = 0; return (error); } @@ -1092,13 +1086,9 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); if (error != 0) { if (error == ENOENT) { - if (orig_resid == zfs_uio_resid(&uio)) { - error = EINVAL; - } else { - error = 0; - if (eofp != NULL) - *eofp = 1; - } + if (eofp != NULL) + *eofp = 1; + error = 0; } zfs_exit(zfsvfs, FTAG); return (error); @@ -1111,10 +1101,8 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) entry.d_reclen = sizeof (entry); error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); if (error != 0) { - if (error == ENAMETOOLONG) { - error = orig_resid == zfs_uio_resid(&uio) ? - EINVAL : 0; - } + if (error == ENAMETOOLONG) + error = 0; zfs_exit(zfsvfs, FTAG); return (SET_ERROR(error)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c index 21e5f7938f9f..ca13569a1235 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c @@ -164,8 +164,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, - ssize_t *resid) + uint8_t ashift, ssize_t *resid) { + (void) ashift; return (zfs_file_write_impl(fp, buf, count, &off, resid)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 174141a5deab..411225786089 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -61,6 +61,7 @@ #include <sys/fs/zfs.h> #include <sys/dmu.h> #include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> #include <sys/spa.h> #include <sys/txg.h> #include <sys/dbuf.h> @@ -388,7 +389,9 @@ zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, error = vn_lock(vp, LK_EXCLUSIVE); if (error) return (error); + vn_seqc_write_begin(vp); error = zfs_ioctl_setxattr(vp, fsx, cred); + vn_seqc_write_end(vp); VOP_UNLOCK(vp); return (error); } @@ -1695,7 +1698,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, objset_t *os; caddr_t outbuf; size_t bufsize; - ssize_t orig_resid; zap_cursor_t zc; zap_attribute_t *zap; uint_t bytes_wanted; @@ -1744,7 +1746,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, error = 0; os = zfsvfs->z_os; offset = zfs_uio_offset(uio); - orig_resid = zfs_uio_resid(uio); prefetch = zp->z_zn_prefetch; zap = zap_attribute_long_alloc(); @@ -1924,7 +1925,7 @@ update: kmem_free(outbuf, bufsize); if (error == ENOENT) - error = orig_resid == zfs_uio_resid(uio) ? EINVAL : 0; + error = 0; ZFS_ACCESSTIME_STAMP(zfsvfs, zp); @@ -2205,6 +2206,7 @@ zfs_setattr_dir(znode_t *dzp) if (err) break; + vn_seqc_write_begin(ZTOV(zp)); mutex_enter(&dzp->z_lock); if (zp->z_uid != dzp->z_uid) { @@ -2254,6 +2256,7 @@ sa_add_projid_err: dmu_tx_abort(tx); } tx = NULL; + vn_seqc_write_end(ZTOV(zp)); if (err != 0 && err != ENOENT) break; @@ -5729,6 +5732,9 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) { ulong_t val; int error; +#ifdef _PC_CLONE_BLKSIZE + zfsvfs_t *zfsvfs; +#endif error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); @@ -5775,6 +5781,21 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = 1; return (0); #endif +#ifdef _PC_CLONE_BLKSIZE + case _PC_CLONE_BLKSIZE: + zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data; + if (zfs_bclone_enabled && + spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os), + SPA_FEATURE_BLOCK_CLONING)) + *ap->a_retval = dsl_dataset_feature_is_active( + zfsvfs->z_os->os_dsl_dataset, + SPA_FEATURE_LARGE_BLOCKS) ? + SPA_MAXBLOCKSIZE : + SPA_OLD_MAXBLOCKSIZE; + else + *ap->a_retval = 0; + return (0); +#endif default: return (vop_stdpathconf(ap)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c index 7cd0a153577c..649022ab5bcb 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c @@ -817,6 +817,10 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, (*zpp)->z_dnodesize = dnodesize; (*zpp)->z_projid = projid; + vnode_t *vp = ZTOV(*zpp); + if (!(flag & IS_ROOT_NODE)) + vn_seqc_write_begin(vp); + if (vap->va_mask & AT_XVATTR) zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); @@ -825,7 +829,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } if (!(flag & IS_ROOT_NODE)) { - vnode_t *vp = ZTOV(*zpp); + vn_seqc_write_end(vp); vp->v_vflag |= VV_FORCEINSMQ; int err = insmntque(vp, zfsvfs->z_vfs); vp->v_vflag &= ~VV_FORCEINSMQ; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 830fad7fe793..1bd3500e9f66 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -471,13 +471,17 @@ vdev_disk_close(vdev_t *v) if (v->vdev_reopening || vd == NULL) return; + rw_enter(&vd->vd_lock, RW_WRITER); + if (vd->vd_bdh != NULL) vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa), zfs_vdev_holder); + v->vdev_tsd = NULL; + + rw_exit(&vd->vd_lock); rw_destroy(&vd->vd_lock); kmem_free(vd, sizeof (vdev_disk_t)); - v->vdev_tsd = NULL; } /* diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c index c729947369c2..3fdcdbac6f68 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c @@ -115,8 +115,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) */ int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, - ssize_t *resid) + uint8_t ashift, ssize_t *resid) { + (void) ashift; ssize_t rc; rc = kernel_write(fp, buf, count, &off); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index cd606e667bff..8a7d14ab6119 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -1556,6 +1556,12 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; +#ifdef HAVE_SET_DEFAULT_D_OP + set_default_d_op(sb, &zpl_dentry_operations); +#else + sb->s_d_op = &zpl_dentry_operations; +#endif + /* Set features for file system. */ zfs_set_fuid_feature(zfsvfs); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c index 48dae79a2373..81ac26cb0c93 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c @@ -202,7 +202,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) return (!!dentry->d_inode); } -static dentry_operations_t zpl_dops_snapdirs = { +static const struct dentry_operations zpl_dops_snapdirs = { /* * Auto mounting of snapshots is only supported for 2.6.37 and * newer kernels. Prior to this kernel the ops->follow_link() @@ -215,6 +215,51 @@ static dentry_operations_t zpl_dops_snapdirs = { .d_revalidate = zpl_snapdir_revalidate, }; +/* + * For the .zfs control directory to work properly we must be able to override + * the default operations table and register custom .d_automount and + * .d_revalidate callbacks. + */ +static void +set_snapdir_dentry_ops(struct dentry *dentry, unsigned int extraflags) { + static const unsigned int op_flags = + DCACHE_OP_HASH | DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE | + DCACHE_OP_PRUNE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_REAL; + +#ifdef HAVE_D_SET_D_OP + /* + * d_set_d_op() will set the DCACHE_OP_ flags according to what it + * finds in the passed dentry_operations, so we don't have to. + * + * We clear the flags and the old op table before calling d_set_d_op() + * because issues a warning when the dentry operations table is already + * set. + */ + dentry->d_op = NULL; + dentry->d_flags &= ~op_flags; + d_set_d_op(dentry, &zpl_dops_snapdirs); + dentry->d_flags |= extraflags; +#else + /* + * Since 6.17 there's no exported way to modify dentry ops, so we have + * to reach in and do it ourselves. This should be safe for our very + * narrow use case, which is to create or splice in an entry to give + * access to a snapshot. + * + * We need to set the op flags directly. We hardcode + * DCACHE_OP_REVALIDATE because that's the only operation we have; if + * we ever extend zpl_dops_snapdirs we will need to update the op flags + * to match. + */ + spin_lock(&dentry->d_lock); + dentry->d_op = &zpl_dops_snapdirs; + dentry->d_flags &= ~op_flags; + dentry->d_flags |= DCACHE_OP_REVALIDATE | extraflags; + spin_unlock(&dentry->d_lock); +#endif +} + static struct dentry * zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags) @@ -236,10 +281,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, return (ERR_PTR(error)); ASSERT(error == 0 || ip == NULL); - d_clear_d_op(dentry); - d_set_d_op(dentry, &zpl_dops_snapdirs); - dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - + set_snapdir_dentry_ops(dentry, DCACHE_NEED_AUTOMOUNT); return (d_splice_alias(ip, dentry)); } @@ -373,8 +415,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0); if (error == 0) { - d_clear_d_op(dentry); - d_set_d_op(dentry, &zpl_dops_snapdirs); + set_snapdir_dentry_ops(dentry, 0); d_instantiate(dentry, ip); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c index 53819628627d..444948d03cb3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2023, Datto Inc. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ @@ -33,6 +34,20 @@ #include <linux/iversion.h> #include <linux/version.h> +/* + * What to do when the last reference to an inode is released. If 0, the kernel + * will cache it on the superblock. If 1, the inode will be freed immediately. + * See zpl_drop_inode(). + */ +int zfs_delete_inode = 0; + +/* + * What to do when the last reference to a dentry is released. If 0, the kernel + * will cache it until the entry (file) is destroyed. If 1, the dentry will be + * marked for cleanup, at which time its inode reference will be released. See + * zpl_dentry_delete(). + */ +int zfs_delete_dentry = 0; static struct inode * zpl_inode_alloc(struct super_block *sb) @@ -77,11 +92,36 @@ zpl_dirty_inode(struct inode *ip, int flags) } /* - * When ->drop_inode() is called its return value indicates if the - * inode should be evicted from the inode cache. If the inode is - * unhashed and has no links the default policy is to evict it - * immediately. + * ->drop_inode() is called when the last reference to an inode is released. + * Its return value indicates if the inode should be destroyed immediately, or + * cached on the superblock structure. + * + * By default (zfs_delete_inode=0), we call generic_drop_inode(), which returns + * "destroy immediately" if the inode is unhashed and has no links (roughly: no + * longer exists on disk). On datasets with millions of rarely-accessed files, + * this can cause a large amount of memory to be "pinned" by cached inodes, + * which in turn pin their associated dnodes and dbufs, until the kernel starts + * reporting memory pressure and requests OpenZFS release some memory (see + * zfs_prune()). + * + * When set to 1, we call generic_delete_node(), which always returns "destroy + * immediately", resulting in inodes being destroyed immediately, releasing + * their associated dnodes and dbufs to the dbuf cached and the ARC to be + * evicted as normal. * + * Note that the "last reference" doesn't always mean the last _userspace_ + * reference; the dentry cache also holds a reference, so "busy" inodes will + * still be kept alive that way (subject to dcache tuning). + */ +static int +zpl_drop_inode(struct inode *ip) +{ + if (zfs_delete_inode) + return (generic_delete_inode(ip)); + return (generic_drop_inode(ip)); +} + +/* * The ->evict_inode() callback must minimally truncate the inode pages, * and call clear_inode(). For 2.6.35 and later kernels this will * simply update the inode state, with the sync occurring before the @@ -470,6 +510,7 @@ const struct super_operations zpl_super_operations = { .destroy_inode = zpl_inode_destroy, .dirty_inode = zpl_dirty_inode, .write_inode = NULL, + .drop_inode = zpl_drop_inode, .evict_inode = zpl_evict_inode, .put_super = zpl_put_super, .sync_fs = zpl_sync_fs, @@ -480,6 +521,35 @@ const struct super_operations zpl_super_operations = { .show_stats = NULL, }; +/* + * ->d_delete() is called when the last reference to a dentry is released. Its + * return value indicates if the dentry should be destroyed immediately, or + * retained in the dentry cache. + * + * By default (zfs_delete_dentry=0) the kernel will always cache unused + * entries. Each dentry holds an inode reference, so cached dentries can hold + * the final inode reference indefinitely, leading to the inode and its related + * data being pinned (see zpl_drop_inode()). + * + * When set to 1, we signal that the dentry should be destroyed immediately and + * never cached. This reduces memory usage, at the cost of higher overheads to + * lookup a file, as the inode and its underlying data (dnode/dbuf) need to be + * reloaded and reinflated. + * + * Note that userspace does not have direct control over dentry references and + * reclaim; rather, this is part of the kernel's caching and reclaim subsystems + * (eg vm.vfs_cache_pressure). + */ +static int +zpl_dentry_delete(const struct dentry *dentry) +{ + return (zfs_delete_dentry ? 1 : 0); +} + +const struct dentry_operations zpl_dentry_operations = { + .d_delete = zpl_dentry_delete, +}; + struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, @@ -491,3 +561,10 @@ struct file_system_type zpl_fs_type = { .mount = zpl_mount, .kill_sb = zpl_kill_sb, }; + +ZFS_MODULE_PARAM(zfs, zfs_, delete_inode, INT, ZMOD_RW, + "Delete inodes as soon as the last reference is released."); + +ZFS_MODULE_PARAM(zfs, zfs_, delete_dentry, INT, ZMOD_RW, + "Delete dentries from dentry cache as soon as the last reference is " + "released."); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index bac166fcd89e..967a018640e1 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -484,7 +484,28 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t offset = io_offset(bio, rq); uint64_t size = io_size(bio, rq); - int rw = io_data_dir(bio, rq); + int rw; + + if (rq != NULL) { + /* + * Flush & trim requests go down the zvol_write codepath. Or + * more specifically: + * + * If request is a write, or if it's op_is_sync() and not a + * read, or if it's a flush, or if it's a discard, then send the + * request down the write path. + */ + if (op_is_write(rq->cmd_flags) || + (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || + req_op(rq) == REQ_OP_FLUSH || + op_is_discard(rq->cmd_flags)) { + rw = WRITE; + } else { + rw = READ; + } + } else { + rw = bio_data_dir(bio); + } if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { zvol_end_io(bio, rq, SET_ERROR(ENXIO)); diff --git a/sys/contrib/openzfs/module/zcommon/zfs_deleg.c b/sys/contrib/openzfs/module/zcommon/zfs_deleg.c index 49bb534ca26c..87596558c9a1 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_deleg.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_deleg.c @@ -59,6 +59,7 @@ const zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_SNAPSHOT}, {ZFS_DELEG_PERM_SHARE}, {ZFS_DELEG_PERM_SEND}, + {ZFS_DELEG_PERM_SEND_RAW}, {ZFS_DELEG_PERM_USERPROP}, {ZFS_DELEG_PERM_USERQUOTA}, {ZFS_DELEG_PERM_GROUPQUOTA}, diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c index 864e3898b365..9190ae0362ea 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c @@ -364,8 +364,8 @@ zfs_prop_init(void) static const zprop_index_t xattr_table[] = { { "off", ZFS_XATTR_OFF }, - { "on", ZFS_XATTR_SA }, { "sa", ZFS_XATTR_SA }, + { "on", ZFS_XATTR_SA }, { "dir", ZFS_XATTR_DIR }, { NULL } }; diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c index 04ae9f986d8f..07819ba2be8b 100644 --- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c @@ -467,9 +467,15 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0, PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_SIT_OUT, "sit_out", 0, + PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "SIT_OUT", boolean_table, + sfeatures); zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0, PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_AUTOSIT, "autosit", 0, + PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "AUTOSIT", boolean_table, + sfeatures); /* default index properties */ zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index df41e3b49204..b677f90280d7 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -1392,6 +1392,7 @@ arc_get_complevel(arc_buf_t *buf) return (buf->b_hdr->b_complevel); } +__maybe_unused static inline boolean_t arc_buf_is_shared(arc_buf_t *buf) { diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 7403f10d91b7..fccc4c5b5b94 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2270,14 +2270,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (dn->dn_objset->os_dsl_dataset != NULL) rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG); #endif - /* - * We make this assert for private objects as well, but after we - * check if we're already dirty. They are allowed to re-dirty - * in syncing context. - */ - ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); mutex_enter(&db->db_mtx); /* @@ -2289,12 +2281,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_state == DB_CACHED || db->db_state == DB_FILL || db->db_state == DB_NOFILL); - mutex_enter(&dn->dn_mtx); - dnode_set_dirtyctx(dn, tx, db); - if (tx->tx_txg > dn->dn_dirty_txg) - dn->dn_dirty_txg = tx->tx_txg; - mutex_exit(&dn->dn_mtx); - if (db->db_blkid == DMU_SPILL_BLKID) dn->dn_have_spill = B_TRUE; @@ -2313,13 +2299,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr_next); } - /* - * Only valid if not already dirty. - */ - ASSERT(dn->dn_object == 0 || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); - ASSERT3U(dn->dn_nlevels, >, db->db_level); /* diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c index d6658375f810..0dc9adc7fd4f 100644 --- a/sys/contrib/openzfs/module/zfs/ddt.c +++ b/sys/contrib/openzfs/module/zfs/ddt.c @@ -1701,9 +1701,11 @@ ddt_load(spa_t *spa) } } - error = ddt_log_load(ddt); - if (error != 0 && error != ENOENT) - return (error); + if (ddt->ddt_flags & DDT_FLAG_LOG) { + error = ddt_log_load(ddt); + if (error != 0 && error != ENOENT) + return (error); + } DDT_KSTAT_SET(ddt, dds_log_active_entries, avl_numnodes(&ddt->ddt_log_active->ddl_tree)); diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c index 3d30e244c1f7..c7a2426f3a77 100644 --- a/sys/contrib/openzfs/module/zfs/ddt_log.c +++ b/sys/contrib/openzfs/module/zfs/ddt_log.c @@ -176,11 +176,13 @@ ddt_log_update_stats(ddt_t *ddt) * that's reasonable to expect anyway. */ dmu_object_info_t doi; - uint64_t nblocks; - dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, &doi); - nblocks = doi.doi_physical_blocks_512; - dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, &doi); - nblocks += doi.doi_physical_blocks_512; + uint64_t nblocks = 0; + if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, + &doi) == 0) + nblocks += doi.doi_physical_blocks_512; + if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, + &doi) == 0) + nblocks += doi.doi_physical_blocks_512; ddt_object_t *ddo = &ddt->ddt_log_stats; ddo->ddo_count = @@ -243,6 +245,13 @@ ddt_log_alloc_entry(ddt_t *ddt) } static void +ddt_log_free_entry(ddt_t *ddt, ddt_log_entry_t *ddle) +{ + kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ? + ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle); +} + +static void ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe) { /* Create the log tree entry from a live or stored entry */ @@ -347,8 +356,7 @@ ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe) ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, ddlwe); avl_remove(&ddl->ddl_tree, ddle); - kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ? - ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle); + ddt_log_free_entry(ddt, ddle); return (B_TRUE); } @@ -365,8 +373,7 @@ ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk) ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &ddlwe); avl_remove(&ddl->ddl_tree, ddle); - kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ? - ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle); + ddt_log_free_entry(ddt, ddle); return (B_TRUE); } @@ -527,8 +534,7 @@ ddt_log_empty(ddt_t *ddt, ddt_log_t *ddl) IMPLY(ddt->ddt_version == UINT64_MAX, avl_is_empty(&ddl->ddl_tree)); while ((ddle = avl_destroy_nodes(&ddl->ddl_tree, &cookie)) != NULL) { - kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ? - ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle); + ddt_log_free_entry(ddt, ddle); } ASSERT(avl_is_empty(&ddl->ddl_tree)); } @@ -727,7 +733,7 @@ ddt_log_load(ddt_t *ddt) ddle = fe; fe = AVL_NEXT(fl, fe); avl_remove(fl, ddle); - + ddt_log_free_entry(ddt, ddle); ddle = ae; ae = AVL_NEXT(al, ae); } diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index f7f808d5b8f7..a7a5c89bdafb 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -759,6 +759,8 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset, */ uint8_t ibps = ibs - SPA_BLKPTRSHIFT; limit = P2ROUNDUP(dmu_prefetch_max, 1 << ibs) >> ibs; + if (limit == 0) + end2 = start2; do { level2++; start2 = P2ROUNDUP(start2, 1 << ibps) >> ibps; @@ -1689,8 +1691,8 @@ dmu_object_cached_size(objset_t *os, uint64_t object, dmu_object_info_from_dnode(dn, &doi); - for (uint64_t off = 0; off < doi.doi_max_offset; - off += dmu_prefetch_max) { + for (uint64_t off = 0; off < doi.doi_max_offset && + dmu_prefetch_max > 0; off += dmu_prefetch_max) { /* dbuf_read doesn't prefetch L1 blocks. */ dmu_prefetch_by_dnode(dn, 1, off, dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index a77f338bdfd3..8e6b569c2100 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -2037,6 +2037,8 @@ userquota_updates_task(void *arg) dn->dn_id_flags |= DN_ID_CHKED_BONUS; } dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); @@ -2070,6 +2072,10 @@ dnode_rele_task(void *arg) dnode_t *dn; while ((dn = multilist_sublist_head(list)) != NULL) { + mutex_enter(&dn->dn_mtx); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; + mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); dnode_rele(dn, &os->os_synced_dnodes); } diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 963ff41232a3..e88d394b5229 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -173,9 +173,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; + dn->dn_dirtycnt = 0; dn->dn_bonus = NULL; dn->dn_have_spill = B_FALSE; dn->dn_zio = NULL; @@ -229,9 +227,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); - ASSERT0(dn->dn_dirty_txg); - ASSERT0(dn->dn_dirtyctx); - ASSERT0P(dn->dn_dirtyctx_firstset); + ASSERT0(dn->dn_dirtycnt); ASSERT0P(dn->dn_bonus); ASSERT(!dn->dn_have_spill); ASSERT0P(dn->dn_zio); @@ -692,10 +688,8 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; if (dn->dn_bonus != NULL) { mutex_enter(&dn->dn_bonus->db_mtx); dbuf_destroy(dn->dn_bonus); @@ -800,11 +794,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dn->dn_bonuslen = bonuslen; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; - dn->dn_dirtyctx = 0; dn->dn_free_txg = 0; - dn->dn_dirtyctx_firstset = NULL; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; dn->dn_allocated_txg = tx->tx_txg; dn->dn_id_flags = 0; @@ -955,9 +947,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; - ndn->dn_dirty_txg = odn->dn_dirty_txg; - ndn->dn_dirtyctx = odn->dn_dirtyctx; - ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; + ndn->dn_dirtycnt = odn->dn_dirtycnt; ASSERT0(zfs_refcount_count(&odn->dn_tx_holds)); zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds); ASSERT(avl_is_empty(&ndn->dn_dbufs)); @@ -1020,9 +1010,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; - odn->dn_dirty_txg = 0; - odn->dn_dirtyctx = 0; - odn->dn_dirtyctx_firstset = NULL; + odn->dn_dirtycnt = 0; odn->dn_have_spill = B_FALSE; odn->dn_zio = NULL; odn->dn_oldused = 0; @@ -1273,8 +1261,8 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); boolean_t can_free = (dn->dn_type == DMU_OT_NONE && - zfs_refcount_is_zero(&dn->dn_holds) && - !DNODE_IS_DIRTY(dn)); + dn->dn_dirtycnt == 0 && + zfs_refcount_is_zero(&dn->dn_holds)); mutex_exit(&dn->dn_mtx); if (!can_free) @@ -1757,17 +1745,23 @@ dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp) * reference on the dnode. Returns FALSE if unable to add a * new reference. */ +static boolean_t +dnode_add_ref_locked(dnode_t *dn, const void *tag) +{ + ASSERT(MUTEX_HELD(&dn->dn_mtx)); + if (zfs_refcount_is_zero(&dn->dn_holds)) + return (FALSE); + VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + return (TRUE); +} + boolean_t dnode_add_ref(dnode_t *dn, const void *tag) { mutex_enter(&dn->dn_mtx); - if (zfs_refcount_is_zero(&dn->dn_holds)) { - mutex_exit(&dn->dn_mtx); - return (FALSE); - } - VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + boolean_t r = dnode_add_ref_locked(dn, tag); mutex_exit(&dn->dn_mtx); - return (TRUE); + return (r); } void @@ -1830,31 +1824,20 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) } /* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. + * Test if the dnode is dirty, or carrying uncommitted records. * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. + * dn_dirtycnt is the number of txgs this dnode is dirty on. It's incremented + * in dnode_setdirty() the first time the dnode is dirtied on a txg, and + * decremented in either dnode_rele_task() or userquota_updates_task() when the + * txg is synced out. */ boolean_t dnode_is_dirty(dnode_t *dn) { mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - + boolean_t dirty = (dn->dn_dirtycnt != 0); mutex_exit(&dn->dn_mtx); - - return (B_FALSE); + return (dirty); } void @@ -1916,7 +1899,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) * dnode will hang around after we finish processing its * children. */ - VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); + mutex_enter(&dn->dn_mtx); + VERIFY(dnode_add_ref_locked(dn, (void *)(uintptr_t)tx->tx_txg)); + dn->dn_dirtycnt++; + ASSERT3U(dn->dn_dirtycnt, <=, 3); + mutex_exit(&dn->dn_mtx); (void) dbuf_dirty(dn->dn_dbuf, tx); @@ -2221,32 +2208,6 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, mutex_exit(&dn->dn_dbufs_mtx); } -void -dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag) -{ - /* - * Don't set dirtyctx to SYNC if we're just modifying this as we - * initialize the objset. - */ - if (dn->dn_dirtyctx == DN_UNDIRTIED) { - dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - - if (ds != NULL) { - rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag); - } - if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - if (dmu_tx_is_syncing(tx)) - dn->dn_dirtyctx = DN_DIRTY_SYNC; - else - dn->dn_dirtyctx = DN_DIRTY_OPEN; - dn->dn_dirtyctx_firstset = tag; - } - if (ds != NULL) { - rrw_exit(&ds->ds_bp_rwlock, tag); - } - } -} - static void dnode_partial_zero(dnode_t *dn, uint64_t off, uint64_t blkoff, uint64_t len, dmu_tx_t *tx) @@ -2695,6 +2656,32 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, } /* + * Adjust *offset to the next (or previous) block byte offset at lvl. + * Returns FALSE if *offset would overflow or underflow. + */ +static boolean_t +dnode_next_block(dnode_t *dn, int flags, uint64_t *offset, int lvl) +{ + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + int span = lvl * epbs + dn->dn_datablkshift; + uint64_t blkid, maxblkid; + + if (span >= 8 * sizeof (uint64_t)) + return (B_FALSE); + + blkid = *offset >> span; + maxblkid = 1ULL << (8 * sizeof (*offset) - span); + if (!(flags & DNODE_FIND_BACKWARDS) && blkid + 1 < maxblkid) + *offset = (blkid + 1) << span; + else if ((flags & DNODE_FIND_BACKWARDS) && blkid > 0) + *offset = (blkid << span) - 1; + else + return (B_FALSE); + + return (B_TRUE); +} + +/* * Find the next hole, data, or sparse region at or after *offset. * The value 'blkfill' tells us how many items we expect to find * in an L0 data block; this value is 1 for normal objects, @@ -2721,7 +2708,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, int minlvl, uint64_t blkfill, uint64_t txg) { - uint64_t initial_offset = *offset; + uint64_t matched = *offset; int lvl, maxlvl; int error = 0; @@ -2745,16 +2732,36 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, maxlvl = dn->dn_phys->dn_nlevels; - for (lvl = minlvl; lvl <= maxlvl; lvl++) { + for (lvl = minlvl; lvl <= maxlvl; ) { error = dnode_next_offset_level(dn, flags, offset, lvl, blkfill, txg); - if (error != ESRCH) + if (error == 0 && lvl > minlvl) { + --lvl; + matched = *offset; + } else if (error == ESRCH && lvl < maxlvl && + dnode_next_block(dn, flags, &matched, lvl)) { + /* + * Continue search at next/prev offset in lvl+1 block. + * + * Usually we only search upwards at the start of the + * search as higher level blocks point at a matching + * minlvl block in most cases, but we backtrack if not. + * + * This can happen for txg > 0 searches if the block + * contains only BPs/dnodes freed at that txg. It also + * happens if we are still syncing out the tree, and + * some BP's at higher levels are not updated yet. + * + * We must adjust offset to avoid coming back to the + * same offset and getting stuck looping forever. This + * also deals with the case where offset is already at + * the beginning or end of the object. + */ + ++lvl; + *offset = matched; + } else { break; - } - - while (error == 0 && --lvl >= minlvl) { - error = dnode_next_offset_level(dn, - flags, offset, lvl, blkfill, txg); + } } /* @@ -2766,9 +2773,6 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, error = 0; } - if (error == 0 && (flags & DNODE_FIND_BACKWARDS ? - initial_offset < *offset : initial_offset > *offset)) - error = SET_ERROR(ESRCH); out: if (!(flags & DNODE_FIND_HAVELOCK)) rw_exit(&dn->dn_struct_rwlock); diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c index 7db72b9b04b0..fd46127b6068 100644 --- a/sys/contrib/openzfs/module/zfs/mmp.c +++ b/sys/contrib/openzfs/module/zfs/mmp.c @@ -446,7 +446,7 @@ mmp_write_uberblock(spa_t *spa) uint64_t offset; hrtime_t lock_acquire_time = gethrtime(); - spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER); + spa_config_enter_priority(spa, SCL_STATE, mmp_tag, RW_READER); lock_acquire_time = gethrtime() - lock_acquire_time; if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10)) zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns " diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c index ea2d2c7227c8..d73195f1a21f 100644 --- a/sys/contrib/openzfs/module/zfs/range_tree.c +++ b/sys/contrib/openzfs/module/zfs/range_tree.c @@ -585,7 +585,7 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, * the size, since we do not support removing partial segments * of range trees with gaps. */ - zfs_zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - + zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - zfs_rs_get_start_raw(rs, rt)); zfs_range_tree_stat_incr(rt, &rs_tmp); diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index cf28955b0c50..f615591e826b 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -372,6 +372,8 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)); fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata); + fnvlist_add_uint64(config, ZPOOL_CONFIG_MIN_ALLOC, spa->spa_min_alloc); + fnvlist_add_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, spa->spa_max_alloc); if (spa->spa_comment != NULL) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index dceafbc27556..0bead6d49666 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -251,11 +251,11 @@ spa_mode_t spa_mode_global = SPA_MODE_UNINIT; #ifdef ZFS_DEBUG /* - * Everything except dprintf, set_error, spa, and indirect_remap is on - * by default in debug builds. + * Everything except dprintf, set_error, indirect_remap, and raidz_reconstruct + * is on by default in debug builds. */ int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR | - ZFS_DEBUG_INDIRECT_REMAP); + ZFS_DEBUG_INDIRECT_REMAP | ZFS_DEBUG_RAIDZ_RECONSTRUCT); #else int zfs_flags = 0; #endif @@ -510,7 +510,7 @@ spa_config_tryenter(spa_t *spa, int locks, const void *tag, krw_t rw) static void spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw, - int mmp_flag) + int priority_flag) { (void) tag; int wlocks_held = 0; @@ -526,7 +526,7 @@ spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw, mutex_enter(&scl->scl_lock); if (rw == RW_READER) { while (scl->scl_writer || - (!mmp_flag && scl->scl_write_wanted)) { + (!priority_flag && scl->scl_write_wanted)) { cv_wait(&scl->scl_cv, &scl->scl_lock); } } else { @@ -551,7 +551,7 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) } /* - * The spa_config_enter_mmp() allows the mmp thread to cut in front of + * The spa_config_enter_priority() allows the mmp thread to cut in front of * outstanding write lock requests. This is needed since the mmp updates are * time sensitive and failure to service them promptly will result in a * suspended pool. This pool suspension has been seen in practice when there is @@ -560,7 +560,7 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) */ void -spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw) +spa_config_enter_priority(spa_t *spa, int locks, const void *tag, krw_t rw) { spa_config_enter_impl(spa, locks, tag, rw, 1); } @@ -806,6 +806,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_min_ashift = INT_MAX; spa->spa_max_ashift = 0; spa->spa_min_alloc = INT_MAX; + spa->spa_max_alloc = 0; spa->spa_gcd_alloc = INT_MAX; /* Reset cached value */ @@ -1865,6 +1866,19 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize) } /* + * Return the range of minimum allocation sizes for the normal allocation + * class. This can be used by external consumers of the DMU to estimate + * potential wasted capacity when setting the recordsize for an object. + * This is mainly for dRAID pools which always pad to a full stripe width. + */ +void +spa_get_min_alloc_range(spa_t *spa, uint64_t *min_alloc, uint64_t *max_alloc) +{ + *min_alloc = spa->spa_min_alloc; + *max_alloc = spa->spa_max_alloc; +} + +/* * Return the amount of slop space in bytes. It is typically 1/32 of the pool * (3.2%), minus the embedded log space. On very small pools, it may be * slightly larger than this. On very large pools, it will be capped to @@ -3085,6 +3099,7 @@ EXPORT_SYMBOL(spa_version); EXPORT_SYMBOL(spa_state); EXPORT_SYMBOL(spa_load_state); EXPORT_SYMBOL(spa_freeze_txg); +EXPORT_SYMBOL(spa_get_min_alloc_range); /* for Lustre */ EXPORT_SYMBOL(spa_get_dspace); EXPORT_SYMBOL(spa_update_dspace); EXPORT_SYMBOL(spa_deflate); diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 9cf35e379000..c8d7280387a2 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -29,7 +29,7 @@ * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, Datto Inc. All rights reserved. - * Copyright (c) 2021, Klara Inc. + * Copyright (c) 2021, 2025, Klara, Inc. * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP. */ @@ -1086,6 +1086,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, } } + if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops)) + vd->vdev_autosit = + vdev_prop_default_numeric(VDEV_PROP_AUTOSIT); + /* * Add ourselves to the parent's list of children. */ @@ -1187,6 +1191,9 @@ vdev_free(vdev_t *vd) spa_spare_remove(vd); if (vd->vdev_isl2cache) spa_l2cache_remove(vd); + if (vd->vdev_prev_histo) + kmem_free(vd->vdev_prev_histo, + sizeof (uint64_t) * VDEV_L_HISTO_BUCKETS); txg_list_destroy(&vd->vdev_ms_list); txg_list_destroy(&vd->vdev_dtl_list); @@ -1490,12 +1497,14 @@ vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc) { if (min_alloc < spa->spa_min_alloc) spa->spa_min_alloc = min_alloc; - if (spa->spa_gcd_alloc == INT_MAX) { + + if (min_alloc > spa->spa_max_alloc) + spa->spa_max_alloc = min_alloc; + + if (spa->spa_gcd_alloc == INT_MAX) spa->spa_gcd_alloc = min_alloc; - } else { - spa->spa_gcd_alloc = vdev_gcd(min_alloc, - spa->spa_gcd_alloc); - } + else + spa->spa_gcd_alloc = vdev_gcd(min_alloc, spa->spa_gcd_alloc); } void @@ -1553,8 +1562,7 @@ vdev_metaslab_group_create(vdev_t *vd) if (vd->vdev_ashift < spa->spa_min_ashift) spa->spa_min_ashift = vd->vdev_ashift; - uint64_t min_alloc = vdev_get_min_alloc(vd); - vdev_spa_set_alloc(spa, min_alloc); + vdev_spa_set_alloc(spa, vdev_get_min_alloc(vd)); } } } @@ -3857,6 +3865,26 @@ vdev_load(vdev_t *vd) } } + if (vd == vd->vdev_top && vd->vdev_top_zap != 0) { + spa_t *spa = vd->vdev_spa; + uint64_t autosit; + + error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap, + vdev_prop_to_name(VDEV_PROP_AUTOSIT), sizeof (autosit), + 1, &autosit); + if (error == 0) { + vd->vdev_autosit = autosit == 1; + } else if (error == ENOENT) { + vd->vdev_autosit = vdev_prop_default_numeric( + VDEV_PROP_AUTOSIT); + } else { + vdev_dbgmsg(vd, + "vdev_load: zap_lookup(top_zap=%llu) " + "failed [error=%d]", + (u_longlong_t)vd->vdev_top_zap, error); + } + } + /* * Load any rebuild state from the top-level vdev zap. */ @@ -4616,6 +4644,8 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_stat.vs_checksum_errors = 0; vd->vdev_stat.vs_dio_verify_errors = 0; vd->vdev_stat.vs_slow_ios = 0; + atomic_store_64(&vd->vdev_outlier_count, 0); + vd->vdev_read_sit_out_expire = 0; for (int c = 0; c < vd->vdev_children; c++) vdev_clear(spa, vd->vdev_child[c]); @@ -6107,6 +6137,56 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_failfast = intval & 1; break; + case VDEV_PROP_SIT_OUT: + /* Only expose this for a draid or raidz leaf */ + if (!vd->vdev_ops->vdev_op_leaf || + vd->vdev_top == NULL || + (vd->vdev_top->vdev_ops != &vdev_raidz_ops && + vd->vdev_top->vdev_ops != &vdev_draid_ops)) { + error = ENOTSUP; + break; + } + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + if (intval == 1) { + vdev_t *ancestor = vd; + while (ancestor->vdev_parent != vd->vdev_top) + ancestor = ancestor->vdev_parent; + vdev_t *pvd = vd->vdev_top; + uint_t sitouts = 0; + for (int i = 0; i < pvd->vdev_children; i++) { + if (pvd->vdev_child[i] == ancestor) + continue; + if (vdev_sit_out_reads( + pvd->vdev_child[i], 0)) { + sitouts++; + } + } + if (sitouts >= vdev_get_nparity(pvd)) { + error = ZFS_ERR_TOO_MANY_SITOUTS; + break; + } + if (error == 0) + vdev_raidz_sit_child(vd, + INT64_MAX - gethrestime_sec()); + } else { + vdev_raidz_unsit_child(vd); + } + break; + case VDEV_PROP_AUTOSIT: + if (vd->vdev_ops != &vdev_raidz_ops && + vd->vdev_ops != &vdev_draid_ops) { + error = ENOTSUP; + break; + } + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_autosit = intval == 1; + break; case VDEV_PROP_CHECKSUM_N: if (nvpair_value_uint64(elem, &intval) != 0) { error = EINVAL; @@ -6456,6 +6536,19 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) ZPROP_SRC_NONE); } continue; + case VDEV_PROP_SIT_OUT: + /* Only expose this for a draid or raidz leaf */ + if (vd->vdev_ops->vdev_op_leaf && + vd->vdev_top != NULL && + (vd->vdev_top->vdev_ops == + &vdev_raidz_ops || + vd->vdev_top->vdev_ops == + &vdev_draid_ops)) { + vdev_prop_add_list(outnvl, propname, + NULL, vdev_sit_out_reads(vd, 0), + ZPROP_SRC_NONE); + } + continue; case VDEV_PROP_TRIM_SUPPORT: /* only valid for leaf vdevs */ if (vd->vdev_ops->vdev_op_leaf) { @@ -6506,6 +6599,29 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, strval, intval, src); break; + case VDEV_PROP_AUTOSIT: + /* Only raidz vdevs cannot have this property */ + if (vd->vdev_ops != &vdev_raidz_ops && + vd->vdev_ops != &vdev_draid_ops) { + src = ZPROP_SRC_NONE; + intval = ZPROP_BOOLEAN_NA; + } else { + err = vdev_prop_get_int(vd, prop, + &intval); + if (err && err != ENOENT) + break; + + if (intval == + vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + else + src = ZPROP_SRC_LOCAL; + } + + vdev_prop_add_list(outnvl, propname, NULL, + intval, src); + break; + case VDEV_PROP_CHECKSUM_N: case VDEV_PROP_CHECKSUM_T: case VDEV_PROP_IO_N: diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index a05289102af2..8588cfee3f7d 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2018 Intel Corporation. * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. + * Copyright (c) 2025, Klara, Inc. */ #include <sys/zfs_context.h> @@ -1996,6 +1997,33 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr) rc->rc_allow_repair = 1; } } + + if (vdev_sit_out_reads(cvd, zio->io_flags)) { + rr->rr_outlier_cnt++; + ASSERT0(rc->rc_latency_outlier); + rc->rc_latency_outlier = 1; + } + } + + /* + * When the row contains a latency outlier and sufficient parity + * exists to reconstruct the column data, then skip reading the + * known slow child vdev as a performance optimization. + */ + if (rr->rr_outlier_cnt > 0 && + (rr->rr_firstdatacol - rr->rr_missingparity) >= + (rr->rr_missingdata + 1)) { + + for (int c = rr->rr_cols - 1; c >= rr->rr_firstdatacol; c--) { + raidz_col_t *rc = &rr->rr_col[c]; + + if (rc->rc_error == 0 && rc->rc_latency_outlier) { + rr->rr_missingdata++; + rc->rc_error = SET_ERROR(EAGAIN); + rc->rc_skipped = 1; + break; + } + } } /* diff --git a/sys/contrib/openzfs/module/zfs/vdev_file.c b/sys/contrib/openzfs/module/zfs/vdev_file.c index f457669bc809..20b4db65ec06 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_file.c +++ b/sys/contrib/openzfs/module/zfs/vdev_file.c @@ -228,7 +228,8 @@ vdev_file_io_strategy(void *arg) abd_return_buf_copy(zio->io_abd, buf, size); } else { buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); - err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid); + err = zfs_file_pwrite(vf->vf_file, buf, size, off, + vd->vdev_ashift, &resid); abd_return_buf(zio->io_abd, buf, size); } zio->io_error = err; diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index c44f654b0261..0d4fdaa77ba0 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -511,6 +511,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, vd->vdev_asize); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_MIN_ALLOC, + vdev_get_min_alloc(vd)); fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog); if (vd->vdev_noalloc) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_NONALLOCATING, diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index b597d6daefde..56b8e3b60b22 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -24,6 +24,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2016 Gvozden Nešković. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ #include <sys/zfs_context.h> @@ -356,6 +357,32 @@ unsigned long raidz_expand_max_reflow_bytes = 0; uint_t raidz_expand_pause_point = 0; /* + * This represents the duration for a slow drive read sit out. + */ +static unsigned long vdev_read_sit_out_secs = 600; + +/* + * How often each RAID-Z and dRAID vdev will check for slow disk outliers. + * Increasing this interval will reduce the sensitivity of detection (since all + * I/Os since the last check are included in the statistics), but will slow the + * response to a disk developing a problem. + * + * Defaults to once per second; setting extremely small values may cause + * negative performance effects. + */ +static hrtime_t vdev_raidz_outlier_check_interval_ms = 1000; + +/* + * When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is + * used to determine how far out an outlier must be before it counts as an event + * worth consdering. + * + * Smaller values will result in more aggressive sitting out of disks that may + * have problems, but may significantly increase the rate of spurious sit-outs. + */ +static uint32_t vdev_raidz_outlier_insensitivity = 50; + +/* * Maximum amount of copy io's outstanding at once. */ #ifdef _ILP32 @@ -2311,6 +2338,41 @@ vdev_raidz_min_asize(vdev_t *vd) vd->vdev_children); } +/* + * return B_TRUE if a read should be skipped due to being too slow. + * + * In vdev_child_slow_outlier() it looks for outliers based on disk + * latency from the most recent child reads. Here we're checking if, + * over time, a disk has has been an outlier too many times and is + * now in a sit out period. + */ +boolean_t +vdev_sit_out_reads(vdev_t *vd, zio_flag_t io_flags) +{ + if (vdev_read_sit_out_secs == 0) + return (B_FALSE); + + /* Avoid skipping a data column read when scrubbing */ + if (io_flags & ZIO_FLAG_SCRUB) + return (B_FALSE); + + if (!vd->vdev_ops->vdev_op_leaf) { + boolean_t sitting = B_FALSE; + for (int c = 0; c < vd->vdev_children; c++) { + sitting |= vdev_sit_out_reads(vd->vdev_child[c], + io_flags); + } + return (sitting); + } + + if (vd->vdev_read_sit_out_expire >= gethrestime_sec()) + return (B_TRUE); + + vd->vdev_read_sit_out_expire = 0; + + return (B_FALSE); +} + void vdev_raidz_child_done(zio_t *zio) { @@ -2475,6 +2537,45 @@ vdev_raidz_io_start_read_row(zio_t *zio, raidz_row_t *rr, boolean_t forceparity) rc->rc_skipped = 1; continue; } + + if (vdev_sit_out_reads(cvd, zio->io_flags)) { + rr->rr_outlier_cnt++; + ASSERT0(rc->rc_latency_outlier); + rc->rc_latency_outlier = 1; + } + } + + /* + * When the row contains a latency outlier and sufficient parity + * exists to reconstruct the column data, then skip reading the + * known slow child vdev as a performance optimization. + */ + if (rr->rr_outlier_cnt > 0 && + (rr->rr_firstdatacol - rr->rr_missingparity) >= + (rr->rr_missingdata + 1)) { + + for (int c = rr->rr_cols - 1; c >= 0; c--) { + raidz_col_t *rc = &rr->rr_col[c]; + + if (rc->rc_error == 0 && rc->rc_latency_outlier) { + if (c >= rr->rr_firstdatacol) + rr->rr_missingdata++; + else + rr->rr_missingparity++; + rc->rc_error = SET_ERROR(EAGAIN); + rc->rc_skipped = 1; + break; + } + } + } + + for (int c = rr->rr_cols - 1; c >= 0; c--) { + raidz_col_t *rc = &rr->rr_col[c]; + vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; + + if (rc->rc_error || rc->rc_size == 0) + continue; + if (forceparity || c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 || (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { @@ -2498,6 +2599,7 @@ vdev_raidz_io_start_read_phys_cols(zio_t *zio, raidz_map_t *rm) ASSERT3U(prc->rc_devidx, ==, i); vdev_t *cvd = vd->vdev_child[i]; + if (!vdev_readable(cvd)) { prc->rc_error = SET_ERROR(ENXIO); prc->rc_tried = 1; /* don't even try */ @@ -2774,6 +2876,239 @@ vdev_raidz_worst_error(raidz_row_t *rr) return (error); } +/* + * Find the median value from a set of n values + */ +static uint64_t +latency_median_value(const uint64_t *data, size_t n) +{ + uint64_t m; + + if (n % 2 == 0) + m = (data[(n >> 1) - 1] + data[n >> 1]) >> 1; + else + m = data[((n + 1) >> 1) - 1]; + + return (m); +} + +/* + * Calculate the outlier fence from a set of n latency values + * + * fence = Q3 + vdev_raidz_outlier_insensitivity x (Q3 - Q1) + */ +static uint64_t +latency_quartiles_fence(const uint64_t *data, size_t n, uint64_t *iqr) +{ + uint64_t q1 = latency_median_value(&data[0], n >> 1); + uint64_t q3 = latency_median_value(&data[(n + 1) >> 1], n >> 1); + + /* + * To avoid detecting false positive outliers when N is small and + * and the latencies values are very close, make sure the IQR + * is at least 25% larger than Q1. + */ + *iqr = MAX(q3 - q1, q1 / 4); + + return (q3 + (*iqr * vdev_raidz_outlier_insensitivity)); +} +#define LAT_CHILDREN_MIN 5 +#define LAT_OUTLIER_LIMIT 20 + +static int +latency_compare(const void *arg1, const void *arg2) +{ + const uint64_t *l1 = (uint64_t *)arg1; + const uint64_t *l2 = (uint64_t *)arg2; + + return (TREE_CMP(*l1, *l2)); +} + +void +vdev_raidz_sit_child(vdev_t *svd, uint64_t secs) +{ + for (int c = 0; c < svd->vdev_children; c++) + vdev_raidz_sit_child(svd->vdev_child[c], secs); + + if (!svd->vdev_ops->vdev_op_leaf) + return; + + /* Begin a sit out period for this slow drive */ + svd->vdev_read_sit_out_expire = gethrestime_sec() + + secs; + + /* Count each slow io period */ + mutex_enter(&svd->vdev_stat_lock); + svd->vdev_stat.vs_slow_ios++; + mutex_exit(&svd->vdev_stat_lock); +} + +void +vdev_raidz_unsit_child(vdev_t *vd) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_raidz_unsit_child(vd->vdev_child[c]); + + if (!vd->vdev_ops->vdev_op_leaf) + return; + + vd->vdev_read_sit_out_expire = 0; +} + +/* + * Check for any latency outlier from latest set of child reads. + * + * Uses a Tukey's fence, with K = 50, for detecting extreme outliers. This + * rule defines extreme outliers as data points outside the fence of the + * third quartile plus fifty times the Interquartile Range (IQR). This range + * is the distance between the first and third quartile. + * + * Fifty is an extremely large value for Tukey's fence, but the outliers we're + * attempting to detect here are orders of magnitude times larger than the + * median. This large value should capture any truly fault disk quickly, + * without causing spurious sit-outs. + * + * To further avoid spurious sit-outs, vdevs must be detected multiple times + * as an outlier before they are sat, and outlier counts will gradually decay. + * Every nchildren times we have detected an outlier, we subtract 2 from the + * outlier count of all children. If detected outliers are close to uniformly + * distributed, this will result in the outlier count remaining close to 0 + * (in expectation; over long enough time-scales, spurious sit-outs are still + * possible). + */ +static void +vdev_child_slow_outlier(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + if (!vd->vdev_autosit || vdev_read_sit_out_secs == 0 || + vd->vdev_children < LAT_CHILDREN_MIN) + return; + + hrtime_t now = getlrtime(); + uint64_t last = atomic_load_64(&vd->vdev_last_latency_check); + + if ((now - last) < MSEC2NSEC(vdev_raidz_outlier_check_interval_ms)) + return; + + /* Allow a single winner when there are racing callers. */ + if (atomic_cas_64(&vd->vdev_last_latency_check, last, now) != last) + return; + + int children = vd->vdev_children; + uint64_t *lat_data = kmem_alloc(sizeof (uint64_t) * children, KM_SLEEP); + + for (int c = 0; c < children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + if (cvd->vdev_prev_histo == NULL) { + mutex_enter(&cvd->vdev_stat_lock); + size_t size = + sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]); + cvd->vdev_prev_histo = kmem_zalloc(size, KM_SLEEP); + memcpy(cvd->vdev_prev_histo, + cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ], + size); + mutex_exit(&cvd->vdev_stat_lock); + } + } + uint64_t max = 0; + vdev_t *svd = NULL; + uint_t sitouts = 0; + boolean_t skip = B_FALSE, svd_sitting = B_FALSE; + for (int c = 0; c < children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + boolean_t sitting = vdev_sit_out_reads(cvd, 0) || + cvd->vdev_state != VDEV_STATE_HEALTHY; + + /* We can't sit out more disks than we have parity */ + if (sitting && ++sitouts >= vdev_get_nparity(vd)) + skip = B_TRUE; + + mutex_enter(&cvd->vdev_stat_lock); + + uint64_t *prev_histo = cvd->vdev_prev_histo; + uint64_t *histo = + cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ]; + if (skip) { + size_t size = + sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]); + memcpy(prev_histo, histo, size); + mutex_exit(&cvd->vdev_stat_lock); + continue; + } + uint64_t count = 0; + lat_data[c] = 0; + for (int i = 0; i < VDEV_L_HISTO_BUCKETS; i++) { + uint64_t this_count = histo[i] - prev_histo[i]; + lat_data[c] += (1ULL << i) * this_count; + count += this_count; + } + size_t size = sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]); + memcpy(prev_histo, histo, size); + mutex_exit(&cvd->vdev_stat_lock); + lat_data[c] /= MAX(1, count); + + /* Wait until all disks have been read from */ + if (lat_data[c] == 0 && !sitting) { + skip = B_TRUE; + continue; + } + + /* Keep track of the vdev with largest value */ + if (lat_data[c] > max) { + max = lat_data[c]; + svd = cvd; + svd_sitting = sitting; + } + } + + if (skip) { + kmem_free(lat_data, sizeof (uint64_t) * children); + return; + } + + qsort((void *)lat_data, children, sizeof (uint64_t), latency_compare); + + uint64_t iqr; + uint64_t fence = latency_quartiles_fence(lat_data, children, &iqr); + + ASSERT3U(lat_data[children - 1], ==, max); + if (max > fence && !svd_sitting) { + ASSERT3U(iqr, >, 0); + uint64_t incr = MAX(1, MIN((max - fence) / iqr, + LAT_OUTLIER_LIMIT / 4)); + vd->vdev_outlier_count += incr; + if (vd->vdev_outlier_count >= children) { + for (int c = 0; c < children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + cvd->vdev_outlier_count -= 2; + cvd->vdev_outlier_count = MAX(0, + cvd->vdev_outlier_count); + } + vd->vdev_outlier_count = 0; + } + /* + * Keep track of how many times this child has had + * an outlier read. A disk that persitently has a + * higher than peers outlier count will be considered + * a slow disk. + */ + svd->vdev_outlier_count += incr; + if (svd->vdev_outlier_count > LAT_OUTLIER_LIMIT) { + ASSERT0(svd->vdev_read_sit_out_expire); + vdev_raidz_sit_child(svd, vdev_read_sit_out_secs); + (void) zfs_ereport_post(FM_EREPORT_ZFS_SITOUT, + zio->io_spa, svd, NULL, NULL, 0); + vdev_dbgmsg(svd, "begin read sit out for %d secs", + (int)vdev_read_sit_out_secs); + + for (int c = 0; c < vd->vdev_children; c++) + vd->vdev_child[c]->vdev_outlier_count = 0; + } + } + + kmem_free(lat_data, sizeof (uint64_t) * children); +} + static void vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr) { @@ -3515,6 +3850,9 @@ vdev_raidz_io_done(zio_t *zio) raidz_row_t *rr = rm->rm_row[i]; vdev_raidz_io_done_verified(zio, rr); } + /* Periodically check for a read outlier */ + if (zio->io_type == ZIO_TYPE_READ) + vdev_child_slow_outlier(zio); zio_checksum_verified(zio); } else { /* @@ -5155,3 +5493,10 @@ ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_after_expand, INT, ZMOD_RW, "For expanded RAIDZ, automatically start a pool scrub when expansion " "completes"); +ZFS_MODULE_PARAM(zfs_vdev, vdev_, read_sit_out_secs, ULONG, ZMOD_RW, + "Raidz/draid slow disk sit out time period in seconds"); +ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_check_interval_ms, U64, + ZMOD_RW, "Interval to check for slow raidz/draid children"); +ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_insensitivity, UINT, + ZMOD_RW, "How insensitive the slow raidz/draid child check should be"); +/* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c index 0816ea134bf3..4cf9e0dbb405 100644 --- a/sys/contrib/openzfs/module/zfs/zfeature.c +++ b/sys/contrib/openzfs/module/zfs/zfeature.c @@ -308,6 +308,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount, ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature)); uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; + ASSERT(MUTEX_HELD(&spa->spa_feat_stats_lock)); VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid, sizeof (uint64_t), 1, &refcount, tx)); @@ -360,7 +361,9 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) feature->fi_guid, 1, strlen(feature->fi_desc) + 1, feature->fi_desc, tx)); + mutex_enter(&spa->spa_feat_stats_lock); feature_sync(spa, feature, initial_refcount, tx); + mutex_exit(&spa->spa_feat_stats_lock); if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) { uint64_t enabling_txg = dmu_tx_get_txg(tx); @@ -416,6 +419,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action, ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); + mutex_enter(&spa->spa_feat_stats_lock); VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP); switch (action) { @@ -433,6 +437,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action, } feature_sync(spa, feature, refcount, tx); + mutex_exit(&spa->spa_feat_stats_lock); } void diff --git a/sys/contrib/openzfs/module/zfs/zfs_crrd.c b/sys/contrib/openzfs/module/zfs/zfs_crrd.c index f9267ed41d71..30d4c7c36897 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_crrd.c +++ b/sys/contrib/openzfs/module/zfs/zfs_crrd.c @@ -162,9 +162,9 @@ dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg) daydiff = time - rrd_tail(&db->dbr_days); monthdiff = time - rrd_tail(&db->dbr_months); - if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60)) + if (monthdiff >= 0 && monthdiff >= 30 * 24 * 60 * 60) rrd_add(&db->dbr_months, time, txg); - else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60)) + else if (daydiff >= 0 && daydiff >= 24 * 60 * 60) rrd_add(&db->dbr_days, time, txg); else if (minutedif >= 0) rrd_add(&db->dbr_minutes, time, txg); @@ -208,7 +208,8 @@ dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2) if (r2 == NULL) return (r1); - return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2); + return (ABS(tv - (hrtime_t)r1->rrdd_time) < + ABS(tv - (hrtime_t)r2->rrdd_time) ? r1 : r2); } uint64_t diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 121b966b9864..5ca7c2320c4e 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -683,6 +683,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) dsl_dataset_t *ds; const char *cp; int error; + boolean_t rawok = (zc->zc_flags & 0x8); /* * Generate the current snapshot name from the given objsetid, then @@ -705,6 +706,10 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, ZFS_DELEG_PERM_SEND, cr); + if (error != 0 && rawok == B_TRUE) { + error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, + ZFS_DELEG_PERM_SEND_RAW, cr); + } dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); @@ -714,9 +719,17 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) static int zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { + boolean_t rawok = nvlist_exists(innvl, "rawok"); + int error; + (void) innvl; - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_SEND, cr)); + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND, cr); + if (error != 0 && rawok == B_TRUE) { + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND_RAW, cr); + } + return (error); } static int @@ -4726,7 +4739,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) error = error ? error : resume_err; } zfs_vfs_rele(zfsvfs); - } else if ((zv = zvol_suspend(fsname)) != NULL) { + } else if (zvol_suspend(fsname, &zv) == 0) { error = dsl_dataset_rollback(fsname, target, zvol_tag(zv), outnvl); zvol_resume(zv); @@ -5448,7 +5461,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin, } error = error ? error : end_err; zfs_vfs_rele(zfsvfs); - } else if ((zv = zvol_suspend(tofs)) != NULL) { + } else if (zvol_suspend(tofs, &zv) == 0) { error = dmu_recv_end(&drc, zvol_tag(zv)); zvol_resume(zv); } else { @@ -7619,7 +7632,7 @@ zfs_ioctl_init(void) zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB, zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, - POOL_CHECK_NONE, B_TRUE, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub)); zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS, diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 4cf8912d4269..aeea58bedfe4 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -4574,8 +4574,29 @@ zio_vdev_io_start(zio_t *zio) ASSERT0(zio->io_child_error[ZIO_CHILD_VDEV]); if (vd == NULL) { - if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER)) - spa_config_enter(spa, SCL_ZIO, zio, RW_READER); + if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER)) { + /* + * A deadlock workaround. The ddt_prune_unique_entries() + * -> prune_candidates_sync() code path takes the + * SCL_ZIO reader lock and may request it again here. + * If there is another thread who wants the SCL_ZIO + * writer lock, then scl_write_wanted will be set. + * Thus, the spa_config_enter_priority() is used to + * ignore pending writer requests. + * + * The locking should be revised to remove the need + * for this workaround. If that's not workable then + * it should only be applied to the zios involved in + * the pruning process. This impacts the read/write + * I/O balance while pruning. + */ + if (spa->spa_active_ddt_prune) + spa_config_enter_priority(spa, SCL_ZIO, zio, + RW_READER); + else + spa_config_enter(spa, SCL_ZIO, zio, + RW_READER); + } /* * The mirror_ops handle multiple DVAs in a single BP. @@ -5305,6 +5326,16 @@ zio_ready(zio_t *zio) return (NULL); } + if (zio_injection_enabled) { + hrtime_t target = zio_handle_ready_delay(zio); + if (target != 0 && zio->io_target_timestamp == 0) { + zio->io_stage >>= 1; + zio->io_target_timestamp = target; + zio_delay_interrupt(zio); + return (NULL); + } + } + if (zio->io_ready) { ASSERT(IO_IS_ALLOCATING(zio)); ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c index 981a1be4847c..287577018ed1 100644 --- a/sys/contrib/openzfs/module/zfs/zio_inject.c +++ b/sys/contrib/openzfs/module/zfs/zio_inject.c @@ -827,6 +827,44 @@ zio_handle_export_delay(spa_t *spa, hrtime_t elapsed) zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_EXPORT); } +/* + * For testing, inject a delay before ready state. + */ +hrtime_t +zio_handle_ready_delay(zio_t *zio) +{ + inject_handler_t *handler; + hrtime_t now = gethrtime(); + hrtime_t target = 0; + + /* + * Ignore I/O not associated with any logical data. + */ + if (zio->io_logical == NULL) + return (0); + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + if (zio->io_spa != handler->zi_spa || + handler->zi_record.zi_cmd != ZINJECT_DELAY_READY) + continue; + + /* If this handler matches, inject the delay */ + if (zio_match_iotype(zio, handler->zi_record.zi_iotype) && + zio_match_handler(&zio->io_logical->io_bookmark, + zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, + zio_match_dva(zio), &handler->zi_record, zio->io_error)) { + target = now + (hrtime_t)handler->zi_record.zi_timer; + break; + } + } + + rw_exit(&inject_lock); + return (target); +} + static int zio_calculate_range(const char *pool, zinject_record_t *record) { diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 2fd3e1c37045..faced0db7e9e 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -1145,20 +1145,34 @@ zvol_tag(zvol_state_t *zv) /* * Suspend the zvol for recv and rollback. */ -zvol_state_t * -zvol_suspend(const char *name) +int +zvol_suspend(const char *name, zvol_state_t **zvp) { zvol_state_t *zv; zv = zvol_find_by_name(name, RW_WRITER); if (zv == NULL) - return (NULL); + return (SET_ERROR(ENOENT)); /* block all I/O, release in zvol_resume. */ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock)); + /* + * If it's being removed, unlock and return error. It doesn't make any + * sense to try to suspend a zvol being removed, but being here also + * means that zvol_remove_minors_impl() is about to call zvol_remove() + * and then destroy the zvol_state_t, so returning a pointer to it for + * the caller to mess with would be a disaster anyway. + */ + if (zv->zv_flags & ZVOL_REMOVING) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); + /* NB: Returning EIO here to match zfsvfs_teardown() */ + return (SET_ERROR(EIO)); + } + atomic_inc(&zv->zv_suspend_ref); if (zv->zv_open_count > 0) @@ -1171,7 +1185,8 @@ zvol_suspend(const char *name) mutex_exit(&zv->zv_state_lock); /* zv_suspend_lock is released in zvol_resume() */ - return (zv); + *zvp = zv; + return (0); } int diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c index 3db196953f74..c403c001086a 100644 --- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c +++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c @@ -441,64 +441,6 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) } #ifndef IN_LIBSA -static size_t -zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len, - int level) -{ - int16_t zstd_level; - if (zstd_enum_to_level(level, &zstd_level)) { - ZSTDSTAT_BUMP(zstd_stat_com_inval); - return (s_len); - } - /* - * A zstd early abort heuristic. - * - * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently - * 128k), don't try any of this, just go. - * (because experimentally that was a reasonable cutoff for a perf win - * with tiny ratio change) - * - First, we try LZ4 compression, and if it doesn't early abort, we - * jump directly to whatever compression level we intended to try. - * - Second, we try zstd-1 - if that errors out (usually, but not - * exclusively, if it would overflow), we give up early. - * - * If it works, instead we go on and compress anyway. - * - * Why two passes? LZ4 alone gets you a lot of the way, but on highly - * compressible data, it was losing up to 8.5% of the compressed - * savings versus no early abort, and all the zstd-fast levels are - * worse indications on their own than LZ4, and don't improve the LZ4 - * pass noticably if stacked like this. - */ - size_t actual_abort_size = zstd_abort_size; - if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level && - s_len >= actual_abort_size) { - int pass_len = 1; - pass_len = zfs_lz4_compress(s_start, d_start, s_len, d_len, 0); - if (pass_len < d_len) { - ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed); - goto keep_trying; - } - ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected); - - pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len, - ZIO_ZSTD_LEVEL_1); - if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) { - ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected); - return (s_len); - } - ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed); - } else { - ZSTDSTAT_BUMP(zstd_stat_passignored); - if (s_len < actual_abort_size) { - ZSTDSTAT_BUMP(zstd_stat_passignored_size); - } - } -keep_trying: - return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level)); - -} - /* Compress block using zstd */ static size_t zfs_zstd_compress_impl(void *s_start, void *d_start, size_t s_len, size_t d_len, diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index 1ce668e7b86d..8986e29eb7fb 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -433,7 +433,7 @@ make install DESTDIR=%{?buildroot} find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \; %if 0%{!?__brp_mangle_shebangs:1} find %{?buildroot}%{_bindir} \ - \( -name arc_summary -or -name arcstat -or -name dbufstat \ + \( -name zarcsummary -or -name zarcstat -or -name dbufstat \ -or -name zilstat \) \ -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \; find %{?buildroot}%{_datadir} \ @@ -508,8 +508,8 @@ systemctl --system daemon-reload >/dev/null || true %{_bindir}/raidz_test %{_bindir}/zvol_wait # Optional Python 3 scripts -%{_bindir}/arc_summary -%{_bindir}/arcstat +%{_bindir}/zarcsummary +%{_bindir}/zarcstat %{_bindir}/dbufstat %{_bindir}/zilstat # Man pages diff --git a/sys/contrib/openzfs/scripts/mancheck.sh b/sys/contrib/openzfs/scripts/mancheck.sh index 364ad1b76286..33d7d3c7155f 100755 --- a/sys/contrib/openzfs/scripts/mancheck.sh +++ b/sys/contrib/openzfs/scripts/mancheck.sh @@ -11,12 +11,12 @@ # AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT # OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # -# shellcheck disable=SC2086 +# shellcheck disable=SC2068,SC2086 trap 'rm -f "$stdout_file" "$stderr_file" "$result_file"' EXIT if [ "$#" -eq 0 ]; then - echo "Usage: $0 manpage-directory..." + echo "Usage: $0 <manpage-directory|manpage-file>..." exit 1 fi @@ -27,7 +27,16 @@ fi IFS=" " -files="$(find "$@" -type f -name '*[1-9]*' -not -name '.*')" || exit 1 +files="$( + for path in $@ ; do + find -L $path -type f -name '*[1-9]*' -not -name '.*' + done | sort | uniq +)" + +if [ "$files" = "" ] ; then + echo no files to process! 1>&2 + exit 1 +fi add_excl="$(awk ' /^.\\" lint-ok:/ { @@ -48,6 +57,4 @@ grep -vhE -e 'mandoc: outdated mandoc.db' -e 'STYLE: referenced manual not found if [ -s "$result_file" ]; then cat "$result_file" exit 1 -else - echo "no errors found" fi diff --git a/sys/contrib/openzfs/scripts/spdxcheck.pl b/sys/contrib/openzfs/scripts/spdxcheck.pl index cdab5368f19c..4d4e14368beb 100755 --- a/sys/contrib/openzfs/scripts/spdxcheck.pl +++ b/sys/contrib/openzfs/scripts/spdxcheck.pl @@ -83,8 +83,8 @@ my $tagged_patterns = q( man/man?/*.?.in # Unsuffixed programs (or generated of same) - cmd/arcstat.in - cmd/arc_summary + cmd/zarcstat.in + cmd/zarcsummary cmd/dbufstat.in cmd/zilstat.in cmd/zpool/zpool.d/* diff --git a/sys/contrib/openzfs/scripts/zfs-helpers.sh b/sys/contrib/openzfs/scripts/zfs-helpers.sh index b45384a9aa52..2e97d40db1c1 100755 --- a/sys/contrib/openzfs/scripts/zfs-helpers.sh +++ b/sys/contrib/openzfs/scripts/zfs-helpers.sh @@ -122,6 +122,13 @@ install() { src=$1 dst=$2 + # We may have an old symlink pointing to different ZFS workspace. + # Remove the old symlink if it doesn't point to our workspace. + if [ -h "$dst" ] && [ "$(readlink -f """$dst""")" != "$src" ] ; then + echo "Removing old symlink: $dst -> $(readlink """$dst""")" + rm "$dst" + fi + if [ -h "$dst" ]; then echo "Symlink exists: $dst" elif [ -e "$dst" ]; then diff --git a/sys/contrib/openzfs/scripts/zfs-tests.sh b/sys/contrib/openzfs/scripts/zfs-tests.sh index 04f3b6f32cb8..5a0a1a609448 100755 --- a/sys/contrib/openzfs/scripts/zfs-tests.sh +++ b/sys/contrib/openzfs/scripts/zfs-tests.sh @@ -38,6 +38,7 @@ DEBUG="" CLEANUP="yes" CLEANUPALL="no" KMSG="" +TIMEOUT_DEBUG="" LOOPBACK="yes" STACK_TRACER="no" FILESIZE="4G" @@ -364,6 +365,7 @@ OPTIONS: -k Disable cleanup after test failure -K Log test names to /dev/kmsg -f Use files only, disables block device tests + -O Dump debugging info to /dev/kmsg on test timeout -S Enable stack tracer (negative performance impact) -c Only create and populate constrained path -R Automatically rerun failing tests @@ -402,7 +404,7 @@ $0 -x EOF } -while getopts 'hvqxkKfScRmn:d:Ds:r:?t:T:u:I:' OPTION; do +while getopts 'hvqxkKfScRmOn:d:Ds:r:?t:T:u:I:' OPTION; do case $OPTION in h) usage @@ -445,6 +447,9 @@ while getopts 'hvqxkKfScRmn:d:Ds:r:?t:T:u:I:' OPTION; do export NFS=1 . "$nfsfile" ;; + O) + TIMEOUT_DEBUG="yes" + ;; d) FILEDIR="$OPTARG" ;; @@ -773,6 +778,7 @@ msg "${TEST_RUNNER}" \ "${DEBUG:+-D}" \ "${KMEMLEAK:+-m}" \ "${KMSG:+-K}" \ + "${TIMEOUT_DEBUG:+-O}" \ "-c \"${RUNFILES}\"" \ "-T \"${TAGS}\"" \ "-i \"${STF_SUITE}\"" \ @@ -783,6 +789,7 @@ msg "${TEST_RUNNER}" \ ${DEBUG:+-D} \ ${KMEMLEAK:+-m} \ ${KMSG:+-K} \ + ${TIMEOUT_DEBUG:+-O} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 2da46458289a..9f531411fbe1 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -168,10 +168,10 @@ tags = ['functional', 'cli_root', 'zinject'] tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress', - 'zdb_display_block', 'zdb_encrypted', 'zdb_label_checksum', - 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id', - 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', 'zdb_backup', - 'zdb_tunables'] + 'zdb_display_block', 'zdb_encrypted', 'zdb_encrypted_raw', + 'zdb_label_checksum', 'zdb_object_range_neg', 'zdb_object_range_pos', + 'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', + 'zdb_backup', 'zdb_tunables'] pre = post = tags = ['functional', 'cli_root', 'zdb'] @@ -323,6 +323,10 @@ tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_raw', 'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing'] tags = ['functional', 'cli_root', 'zfs_send'] +[tests/functional/cli_root/zfs_send_delegation] +tests = ['zfs_send_test'] +tags = ['functional', 'cli_root', 'zfs_send_delegation'] + [tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos', @@ -379,7 +383,7 @@ tags = ['functional', 'cli_root', 'zfs_wait'] [tests/functional/cli_root/zhack] tests = ['zhack_label_repair_001', 'zhack_label_repair_002', - 'zhack_label_repair_003', 'zhack_label_repair_004'] + 'zhack_label_repair_003', 'zhack_label_repair_004', 'zhack_metaslab_leak'] pre = post = tags = ['functional', 'cli_root', 'zhack'] @@ -391,8 +395,9 @@ tags = ['functional', 'cli_root', 'zpool'] [tests/functional/cli_root/zpool_add] tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', 'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg', - 'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos', - 'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output'] + 'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_warn_create', + 'zpool_add_warn_degraded', 'zpool_add_warn_removal', 'add-o_ashift', + 'add_prop_ashift', 'zpool_add_dryrun_output'] tags = ['functional', 'cli_root', 'zpool_add'] [tests/functional/cli_root/zpool_attach] @@ -486,6 +491,10 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', tags = ['functional', 'cli_root', 'zpool_import'] timeout = 1200 +[tests/functional/cli_root/zpool_iostat] +tests = ['zpool_iostat_interval_all', 'zpool_iostat_interval_some'] +tags = ['functional', 'cli_root', 'zpool_iostat'] + [tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported', 'zpool_labelclear_removed', 'zpool_labelclear_valid'] @@ -546,7 +555,7 @@ tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_multiple_pools', 'zpool_error_scrub_001_pos', 'zpool_error_scrub_002_pos', 'zpool_error_scrub_003_pos', 'zpool_error_scrub_004_pos', - 'zpool_scrub_date_range_001'] + 'zpool_scrub_date_range_001', 'zpool_scrub_date_range_002'] tags = ['functional', 'cli_root', 'zpool_scrub'] [tests/functional/cli_root/zpool_set] @@ -624,8 +633,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', - 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', - 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege', + 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos', + 'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege', 'zilstat_001_pos'] user = tags = ['functional', 'cli_user', 'misc'] @@ -637,6 +646,10 @@ tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos', user = tags = ['functional', 'cli_user', 'zfs_list'] +[tests/functional/cli_user/zfs_send_delegation_user] +tests = ['zfs_send_usertest'] +tags = ['functional', 'cli_user', 'zfs_send_delegation_user'] + [tests/functional/cli_user/zpool_iostat] tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos', 'zpool_iostat_003_neg', 'zpool_iostat_004_pos', @@ -940,10 +953,11 @@ tags = ['functional', 'rename_dirs'] [tests/functional/replacement] tests = ['attach_import', 'attach_multiple', 'attach_rebuild', - 'attach_resilver', 'detach', 'rebuild_disabled_feature', - 'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild', - 'replace_resilver', 'resilver_restart_001', 'resilver_restart_002', - 'scrub_cancel'] + 'attach_resilver', 'attach_resilver_sit_out', 'detach', + 'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz', + 'replace_import', 'replace_rebuild', 'replace_resilver', + 'replace_resilver_sit_out', 'resilver_restart_001', + 'resilver_restart_002', 'scrub_cancel'] tags = ['functional', 'replacement'] [tests/functional/reservation] @@ -1076,7 +1090,8 @@ tags = ['functional', 'write_dirs'] [tests/functional/xattr] tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos', 'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', - 'xattr_011_pos', 'xattr_012_pos', 'xattr_013_pos', 'xattr_compat'] + 'xattr_011_pos', 'xattr_012_pos', 'xattr_013_pos', 'xattr_014_pos', + 'xattr_compat'] tags = ['functional', 'xattr'] [tests/functional/zvol/zvol_ENOSPC] diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run index f3d56acffde0..339361cc2762 100644 --- a/sys/contrib/openzfs/tests/runfiles/linux.run +++ b/sys/contrib/openzfs/tests/runfiles/linux.run @@ -109,7 +109,9 @@ tags = ['functional', 'direct'] [tests/functional/events:Linux] tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config', - 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] + 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple', + 'zed_synchronous_zedlet', 'slow_vdev_sit_out', 'slow_vdev_sit_out_neg', + 'slow_vdev_degraded_sit_out'] tags = ['functional', 'events'] [tests/functional/fallocate:Linux] @@ -161,7 +163,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', tags = ['functional', 'mmp'] [tests/functional/mount:Linux] -tests = ['umount_unlinked_drain'] +tests = ['umount_unlinked_drain', 'mount_loopback'] tags = ['functional', 'mount'] [tests/functional/pam:Linux] diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run index 7767c0c2d535..249b415029c4 100644 --- a/sys/contrib/openzfs/tests/runfiles/sanity.run +++ b/sys/contrib/openzfs/tests/runfiles/sanity.run @@ -400,8 +400,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg', 'zpool_history_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', - 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', - 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege', + 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos', + 'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege', 'zilstat_001_pos'] user = tags = ['functional', 'cli_user', 'misc'] @@ -622,7 +622,7 @@ tags = ['functional', 'vdev_zaps'] [tests/functional/xattr] tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos', 'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', - 'xattr_011_pos', 'xattr_013_pos', 'xattr_compat'] + 'xattr_011_pos', 'xattr_013_pos', 'xattr_014_pos', 'xattr_compat'] tags = ['functional', 'xattr'] [tests/functional/zvol/zvol_ENOSPC] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in index 2158208be6e5..d2c1185e4a94 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in @@ -34,6 +34,7 @@ from select import select from subprocess import PIPE from subprocess import Popen from subprocess import check_output +from subprocess import run from threading import Timer from time import time, CLOCK_MONOTONIC from os.path import exists @@ -187,6 +188,63 @@ User: %s ''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user) def kill_cmd(self, proc, options, kmemleak, keyboard_interrupt=False): + + """ + We're about to kill a command due to a timeout. + If we're running with the -O option, then dump debug info about the + process with the highest CPU usage to /dev/kmsg (Linux only). This can + help debug the timeout. + + Debug info includes: + - 30 lines from 'top' + - /proc/<PID>/stack output of process with highest CPU usage + - Last lines strace-ing process with highest CPU usage + """ + if exists("/dev/kmsg"): + c = """ +TOP_OUT="$(COLUMNS=160 top -b -n 1 | head -n 30)" +read -r PID CMD <<< $(echo "$TOP_OUT" | /usr/bin/awk \ +"/COMMAND/{ + print_next=1 + next +} +{ + if (print_next == 1) { + print \\$1\\" \\"\\$12 + exit + } +}") +echo "##### ZTS timeout debug #####" +echo "----- top -----" +echo "$TOP_OUT" +echo "----- /proc/$PID/stack ($CMD)) -----" +cat /proc/$PID/stack +echo "----- strace ($CMD) -----" +TMPFILE="$(mktemp --suffix=ZTS)" +/usr/bin/strace -k --stack-traces -p $PID &> "$TMPFILE" & +sleep 0.1 +killall strace +tail -n 30 $TMPFILE +rm "$TMPFILE" +echo "##### /proc/sysrq-trigger stack #####" +""" + c = "sudo bash -c '" + c + "'" + data = run(c, capture_output=True, shell=True, text=True) + out = data.stdout + try: + kp = Popen([SUDO, "sh", "-c", + "echo '" + out + "' > /dev/kmsg"]) + kp.wait() + + """ + Trigger kernel stack traces + """ + kp = Popen([SUDO, "sh", "-c", + "echo l > /proc/sysrq-trigger"]) + kp.wait() + except Exception: + pass + """ Kill a running command due to timeout, or ^C from the keyboard. If sudo is required, this user was verified previously. @@ -1129,6 +1187,9 @@ def parse_args(): parser.add_option('-o', action='callback', callback=options_cb, default=BASEDIR, dest='outputdir', type='string', metavar='outputdir', help='Specify an output directory.') + parser.add_option('-O', action='store_true', default=False, + dest='timeout_debug', + help='Dump debugging info to /dev/kmsg on test timeout') parser.add_option('-i', action='callback', callback=options_cb, default=TESTDIR, dest='testdir', type='string', metavar='testdir', help='Specify a test directory.') diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index 001970120148..5bc65f993734 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -232,7 +232,7 @@ maybe = { 'cli_root/zpool_trim/zpool_trim_fault_export_import_online': ['FAIL', known_reason], 'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', 6141], - 'cli_user/misc/arc_summary_001_pos': ['FAIL', known_reason], + 'cli_user/misc/zarcsummary_001_pos': ['FAIL', known_reason], 'delegate/setup': ['SKIP', exec_reason], 'events/zed_cksum_config': ['FAIL', known_reason], 'fault/auto_replace_002_pos': ['FAIL', known_reason], diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c index cbebd33e0bf6..c8d8622c7571 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c @@ -863,7 +863,8 @@ test_result(const crypto_test_t *test, int encrypt_rv, uint8_t *encrypt_buf, return (pass); /* print summary of test result */ - printf("%s[%lu]: encrypt=%s decrypt=%s\n", test->fileloc, test->id, + printf("%s[%ju]: encrypt=%s decrypt=%s\n", test->fileloc, + (uintmax_t)test->id, encrypt_pass ? "PASS" : "FAIL", decrypt_pass ? "PASS" : "FAIL"); diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index 884a99d785bc..1c4d25e152a7 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -100,6 +100,7 @@ export SYSTEM_FILES_COMMON='awk uniq vmstat wc + which xargs xxh128sum' @@ -146,6 +147,7 @@ export SYSTEM_FILES_LINUX='attr lscpu lsmod lsscsi + mkfs.xfs mkswap modprobe mountpoint @@ -169,8 +171,8 @@ export ZFS_FILES='zdb zpool ztest raidz_test - arc_summary - arcstat + zarcsummary + zarcstat zilstat dbufstat mount.zfs diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib index 23e89599cae0..8b30b9b91641 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -1085,7 +1085,7 @@ function fill_fs # destdir dirnum filenum bytes num_writes data typeset -i filenum=${3:-50} typeset -i bytes=${4:-8192} typeset -i num_writes=${5:-10240} - typeset data=${6:-0} + typeset data=${6:-"R"} mkdir -p $destdir/{1..$dirnum} for f in $destdir/{1..$dirnum}/$TESTFILE{1..$filenum}; do @@ -1112,6 +1112,16 @@ function get_pool_prop # property pool zpool get -Hpo value "$prop" "$pool" || log_fail "zpool get $prop $pool" } +# Get the specified vdev property in parsable format or fail +function get_vdev_prop +{ + typeset prop="$1" + typeset pool="$2" + typeset vdev="$3" + + zpool get -Hpo value "$prop" "$pool" "$vdev" || log_fail "zpool get $prop $pool $vdev" +} + # Return 0 if a pool exists; $? otherwise # # $1 - pool name @@ -1971,6 +1981,28 @@ function wait_vdev_state # pool disk state timeout } # +# Wait for vdev 'sit_out' property to be cleared. +# +# $1 pool name +# $2 vdev name +# $3 timeout +# +function wait_sit_out #pool vdev timeout +{ + typeset pool=${1:-$TESTPOOL} + typeset vdev="$2" + typeset timeout=${3:-300} + for (( timer = 0; timer < $timeout; timer++ )); do + if [ "$(get_vdev_prop sit_out "$pool" "$vdev")" = "off" ]; then + return 0 + fi + sleep 1; + done + + return 1 +} + +# # Check the output of 'zpool status -v <pool>', # and to see if the content of <token> contain the <keyword> specified. # @@ -2881,6 +2913,28 @@ function user_run log_note "user: $user" log_note "cmd: $*" + if ! sudo -Eu $user test -x $PATH ; then + log_note "-------------------------------------------------" + log_note "Warning: $user doesn't have permissions on $PATH" + log_note "" + log_note "This usually happens when you're running ZTS locally" + log_note "from inside the ZFS source dir, and are attempting to" + log_note "run a test that calls user_run. The ephemeral user" + log_note "($user) that ZTS is creating does not have permission" + log_note "to traverse to $PATH, or the binaries in $PATH are" + log_note "not the right permissions." + log_note "" + log_note "To get around this, copy your ZFS source directory" + log_note "to a world-accessible location (like /tmp), and " + log_note "change the permissions on your ZFS source dir " + log_note "to allow access." + log_note "" + log_note "Also, verify that /dev/zfs is RW for others:" + log_note "" + log_note " sudo chmod o+rw /dev/zfs" + log_note "-------------------------------------------------" + fi + typeset out=$TEST_BASE_DIR/out typeset err=$TEST_BASE_DIR/err diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index e273c9f85c28..127ea188f17f 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -72,6 +72,9 @@ MULTIHOST_INTERVAL multihost.interval zfs_multihost_interval OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes +READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs +SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms +SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment @@ -88,6 +91,7 @@ SPA_DISCARD_MEMORY_LIMIT spa.discard_memory_limit zfs_spa_discard_memory_limit SPA_LOAD_VERIFY_DATA spa.load_verify_data spa_load_verify_data SPA_LOAD_VERIFY_METADATA spa.load_verify_metadata spa_load_verify_metadata SPA_NOTE_TXG_TIME spa.note_txg_time spa_note_txg_time +SPA_FLUSH_TXG_TIME spa.flush_txg_time spa_flush_txg_time TRIM_EXTENT_BYTES_MIN trim.extent_bytes_min zfs_trim_extent_bytes_min TRIM_METASLAB_SKIP trim.metaslab_skip zfs_trim_metaslab_skip TRIM_TXG_BATCH trim.txg_batch zfs_trim_txg_batch diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index 41e7b45ef4ec..678c01b58f94 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -197,6 +197,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/cli_root/zpool_import/blockfiles/unclean_export.dat.bz2 \ functional/cli_root/zpool_import/zpool_import.cfg \ functional/cli_root/zpool_import/zpool_import.kshlib \ + functional/cli_root/zpool_iostat/zpool_iostat.kshlib \ functional/cli_root/zpool_initialize/zpool_initialize.kshlib \ functional/cli_root/zpool_labelclear/labelclear.cfg \ functional/cli_root/zpool_remove/zpool_remove.cfg \ @@ -640,6 +641,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zdb/zdb_decompress_zstd.ksh \ functional/cli_root/zdb/zdb_display_block.ksh \ functional/cli_root/zdb/zdb_encrypted.ksh \ + functional/cli_root/zdb/zdb_encrypted_raw.ksh \ functional/cli_root/zdb/zdb_label_checksum.ksh \ functional/cli_root/zdb/zdb_object_range_neg.ksh \ functional/cli_root/zdb/zdb_object_range_pos.ksh \ @@ -892,6 +894,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs_send/zfs_send_raw.ksh \ functional/cli_root/zfs_send/zfs_send_skip_missing.ksh \ functional/cli_root/zfs_send/zfs_send_sparse.ksh \ + functional/cli_root/zfs_send_delegation/cleanup.ksh \ + functional/cli_root/zfs_send_delegation/setup.ksh \ + functional/cli_root/zfs_send_delegation/zfs_send_test.ksh \ functional/cli_root/zfs_set/cache_001_pos.ksh \ functional/cli_root/zfs_set/cache_002_neg.ksh \ functional/cli_root/zfs_set/canmount_001_pos.ksh \ @@ -1009,6 +1014,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zhack/zhack_label_repair_002.ksh \ functional/cli_root/zhack/zhack_label_repair_003.ksh \ functional/cli_root/zhack/zhack_label_repair_004.ksh \ + functional/cli_root/zhack/zhack_metaslab_leak.ksh \ functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \ functional/cli_root/zpool_add/add-o_ashift.ksh \ functional/cli_root/zpool_add/add_prop_ashift.ksh \ @@ -1023,7 +1029,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_add/zpool_add_007_neg.ksh \ functional/cli_root/zpool_add/zpool_add_008_neg.ksh \ functional/cli_root/zpool_add/zpool_add_009_neg.ksh \ - functional/cli_root/zpool_add/zpool_add_010_pos.ksh \ + functional/cli_root/zpool_add/zpool_add_warn_create.ksh \ + functional/cli_root/zpool_add/zpool_add_warn_degraded.ksh \ + functional/cli_root/zpool_add/zpool_add_warn_removal.ksh \ functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh \ functional/cli_root/zpool_attach/attach-o_ashift.ksh \ functional/cli_root/zpool_attach/cleanup.ksh \ @@ -1174,6 +1182,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh \ functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh \ functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh \ + functional/cli_root/zpool_iostat/setup.ksh \ + functional/cli_root/zpool_iostat/cleanup.ksh \ + functional/cli_root/zpool_iostat/zpool_iostat_interval_all.ksh \ + functional/cli_root/zpool_iostat/zpool_iostat_interval_some.ksh \ functional/cli_root/zpool_initialize/cleanup.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \ @@ -1247,6 +1259,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_date_range_001.ksh \ + functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \ @@ -1351,9 +1364,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool/zpool_002_pos.ksh \ functional/cli_root/zpool/zpool_003_pos.ksh \ functional/cli_root/zpool/zpool_colors.ksh \ - functional/cli_user/misc/arcstat_001_pos.ksh \ - functional/cli_user/misc/arc_summary_001_pos.ksh \ - functional/cli_user/misc/arc_summary_002_neg.ksh \ + functional/cli_user/misc/zarcstat_001_pos.ksh \ + functional/cli_user/misc/zarcsummary_001_pos.ksh \ + functional/cli_user/misc/zarcsummary_002_neg.ksh \ functional/cli_user/misc/zilstat_001_pos.ksh \ functional/cli_user/misc/cleanup.ksh \ functional/cli_user/misc/setup.ksh \ @@ -1408,6 +1421,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_user/zfs_list/zfs_list_005_neg.ksh \ functional/cli_user/zfs_list/zfs_list_007_pos.ksh \ functional/cli_user/zfs_list/zfs_list_008_neg.ksh \ + functional/cli_user/zfs_send_delegation_user/cleanup.ksh \ + functional/cli_user/zfs_send_delegation_user/setup.ksh \ + functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh \ functional/cli_user/zpool_iostat/cleanup.ksh \ functional/cli_user/zpool_iostat/setup.ksh \ functional/cli_user/zpool_iostat/zpool_iostat_001_neg.ksh \ @@ -1524,6 +1540,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/events/events_001_pos.ksh \ functional/events/events_002_pos.ksh \ functional/events/setup.ksh \ + functional/events/slow_vdev_degraded_sit_out.ksh \ + functional/events/slow_vdev_sit_out.ksh \ + functional/events/slow_vdev_sit_out_neg.ksh \ functional/events/zed_cksum_config.ksh \ functional/events/zed_cksum_reported.ksh \ functional/events/zed_diagnose_multiple.ksh \ @@ -1532,6 +1551,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/events/zed_rc_filter.ksh \ functional/events/zed_slow_io.ksh \ functional/events/zed_slow_io_many_vdevs.ksh \ + functional/events/zed_synchronous_zedlet.ksh \ functional/exec/cleanup.ksh \ functional/exec/exec_001_pos.ksh \ functional/exec/exec_002_neg.ksh \ @@ -1706,6 +1726,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/mmp/setup.ksh \ functional/mount/cleanup.ksh \ functional/mount/setup.ksh \ + functional/mount/mount_loopback.ksh \ functional/mount/umount_001.ksh \ functional/mount/umountall_001.ksh \ functional/mount/umount_unlinked_drain.ksh \ @@ -1935,6 +1956,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/replacement/attach_multiple.ksh \ functional/replacement/attach_rebuild.ksh \ functional/replacement/attach_resilver.ksh \ + functional/replacement/attach_resilver_sit_out.ksh \ functional/replacement/cleanup.ksh \ functional/replacement/detach.ksh \ functional/replacement/rebuild_disabled_feature.ksh \ @@ -1943,6 +1965,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/replacement/replace_import.ksh \ functional/replacement/replace_rebuild.ksh \ functional/replacement/replace_resilver.ksh \ + functional/replacement/replace_resilver_sit_out.ksh \ functional/replacement/resilver_restart_001.ksh \ functional/replacement/resilver_restart_002.ksh \ functional/replacement/scrub_cancel.ksh \ @@ -2211,6 +2234,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/xattr/xattr_011_pos.ksh \ functional/xattr/xattr_012_pos.ksh \ functional/xattr/xattr_013_pos.ksh \ + functional/xattr/xattr_014_pos.ksh \ functional/xattr/xattr_compat.ksh \ functional/zap_shrink/cleanup.ksh \ functional/zap_shrink/zap_shrink_001_pos.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_encrypted_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_encrypted_raw.ksh new file mode 100755 index 000000000000..85d267d5402f --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_encrypted_raw.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zdb -K ...' should enable reading from a raw-encrypted dataset +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Write some data to a file +# 3. Run zdb -dddd on the file, confirm it can't be read +# 4. Run zdb -K ... -ddddd on the file, confirm it can be read +# + +verify_runnable "both" + +dataset="$TESTPOOL/$TESTFS2" +file="$TESTDIR2/somefile" +keyfile="$TEST_BASE_DIR/keyfile" + +function cleanup +{ + datasetexists "$dataset" && destroy_dataset "$dataset" -f + rm -f "$keyfile" + default_cleanup_noexit +} + +log_onexit cleanup + +log_must default_setup_noexit $DISKS + +log_assert "'zdb -K' should enable reading from a raw-encrypted dataset" + +# The key must be 32 bytes long. +echo -n "$RAWKEY" > "$keyfile" + +log_must zfs create -o mountpoint="$TESTDIR2" \ + -o encryption=on -o keyformat=raw -o keylocation="file://$keyfile" \ + "$dataset" + +echo 'my great encrypted text' > "$file" + +typeset -i obj=$(ls -i "$file" | cut -d' ' -f1) +typeset -i size=$(wc -c < "$file") + +log_note "test file $file is objid $obj, size $size" + +sync_pool "$TESTPOOL" true + +log_must eval "zdb -dddd $dataset $obj | grep -q 'object encrypted'" + +log_must eval "zdb -K $keyfile -dddd $dataset $obj | grep -q 'size\s$size$'" + +log_pass "'zdb -K' enables reading from a raw-encrypted dataset" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh index 46965aa7cc37..b89790d5d525 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh @@ -63,7 +63,7 @@ log_mustnot_expect 'no such tunable: 0' zdb -o show=0 log_mustnot_expect 'no such tunable: 1' zdb -o info=1 # can set multiple in same command -log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294965758 -> 512$"' +log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294932990 -> 512$"' # can set and show in same command log_must eval 'zdb -o zfs_recover=1 -o zfs_recover -o zfs_recover=0 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_recover: 1 zfs_recover: 1 -> 0$"' diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh new file mode 100755 index 000000000000..4a59e15cc693 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh @@ -0,0 +1,43 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib + + +poolexists $TESTPOOL1 && \ + destroy_pool $TESTPOOL1 + +del_user $STAFF1 +del_user $STAFF2 +del_group $STAFF_GROUP + +del_user $OTHER1 +del_user $OTHER2 +del_group $OTHER_GROUP + +default_cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh new file mode 100755 index 000000000000..0978193eddc4 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib + +# Create staff group and add two user to it +log_must add_group $STAFF_GROUP +if ! id $STAFF1 > /dev/null 2>&1; then + log_must add_user $STAFF_GROUP $STAFF1 +fi +if ! id $STAFF2 > /dev/null 2>&1; then + log_must add_user $STAFF_GROUP $STAFF2 +fi + +# Create other group and add two user to it +log_must add_group $OTHER_GROUP +if ! id $OTHER1 > /dev/null 2>&1; then + log_must add_user $OTHER_GROUP $OTHER1 +fi +if ! id $OTHER2 > /dev/null 2>&1; then + log_must add_user $OTHER_GROUP $OTHER2 +fi +DISK=${DISKS%% *} + +default_raidz_setup $DISKS diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh new file mode 100755 index 000000000000..d018f313fae1 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +# STRATEGY: +# 1. Create a pool (this is done by the test framework) +# 2. Create an encrypted dataset +# 3. Write random data to the encrypted dataset +# 4. Snapshot the dataset +# 5. As root: attempt a send and raw send (both should succeed) +# 6. Create a delegation (zfs allow -u user send testpool/encrypted_dataset) +# 7. As root: attempt a send and raw send (both should succeed) +# 8. Create a delegation (zfs allow -u user send:raw testpool/encrypted_dataset) +# 9. As root: attempt a send and raw send (both should succeed) +# 10. Disable delegation (zfs unallow) +# 11. As root: attempt a send and raw send (both should succeed) +# 12. Clean up (handled by framework) +# +# Tested as a user under ../cli_user/zfs_send_delegation_user/ + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib +. $STF_SUITE/tests/functional/delegate/delegate.cfg + +# create encrypted dataset + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS1" + +# create target dataset for receives +if ! zfs list | grep testfs2 >/dev/null 2>&1; then + dataset_created="TRUE" + log_must zfs create $TESTPOOL/$TESTFS2 +fi + +# create user and group +typeset perms="snapshot,reservation,compression,checksum,userprop,receive" + +log_note "Added permissions to the $OTHER1 user." +log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS1 +log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS2 + +# create random data +log_must fill_fs $TESTPOOL/$TESTFS1/child 1 2047 1024 1 R + +# snapshot +log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1 + + +# check baseline send abilities (should pass) +log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0_datastream.$$" +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0raw_datastream.$$" + + +# create delegation +log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1 + +# attempt send with full allow + +log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1_datastream.$$" +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1raw_datastream.$$" + +# create raw delegation +log_must zfs allow $OTHER1 send:raw $TESTPOOL/$TESTFS1 +log_must zfs unallow $OTHER1 send $TESTPOOL/$TESTFS1 + +# test new send abilities (should pass) +log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2_datastream.$$" +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2raw_datastream.$$" + + +# disable raw delegation +zfs unallow $OTHER1 send:raw $TESTPOOL/$TESTFS1 +zfs allow $OTHER1 send $TESTPOOL/$TESTFS1 + +# verify original send abilities (should pass) +log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3_datastream.$$" +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3raw_datastream.$$" + + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + destroy_dataset $TESTPOOL/$TESTFS1 -r \ + destroy_dataset $TESTPOOL/$TESTFS2 -r + +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib index 0f5f6198daf2..0d07b1fd1952 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib @@ -33,13 +33,16 @@ # Test one: # # 1. Create pool on a loopback device with some test data -# 2. Export the pool. -# 3. Corrupt all label checksums in the pool -# 4. Check that pool cannot be imported -# 5. Verify that it cannot be imported after using zhack label repair -u +# 2. Checksum repair should work with a valid TXG. Repeatedly write and +# sync the pool so there are enough transactions for every uberblock +# to have a TXG +# 3. Export the pool. +# 4. Corrupt all label checksums in the pool +# 5. Check that pool cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -u # to ensure that the -u option will quit on corrupted checksums. -# 6. Use zhack label repair -c on device -# 7. Check that pool can be imported and that data is intact +# 7. Use zhack label repair -c on device +# 8. Check that pool can be imported and that data is intact # # Test two: # @@ -97,7 +100,7 @@ VIRTUAL_MIRROR_DEVICE= function cleanup_lo { - L_DEVICE="$1" + typeset L_DEVICE="$1" if [[ -e $L_DEVICE ]]; then if is_linux; then @@ -133,9 +136,9 @@ function get_devsize function pick_logop { - L_SHOULD_SUCCEED="$1" + typeset L_SHOULD_SUCCEED="$1" - l_logop="log_mustnot" + typeset l_logop="log_mustnot" if [ "$L_SHOULD_SUCCEED" == true ]; then l_logop="log_must" fi @@ -145,7 +148,9 @@ function pick_logop function check_dataset { - L_SHOULD_SUCCEED="$1" + typeset L_SHOULD_SUCCEED="$1" + + typeset L_LOGOP= L_LOGOP="$(pick_logop "$L_SHOULD_SUCCEED")" "$L_LOGOP" mounted "$TESTPOOL"/"$TESTFS" @@ -170,9 +175,21 @@ function setup_dataset check_dataset true } +function force_transactions +{ + typeset L_TIMES="$1" + typeset i= + for ((i=0; i < L_TIMES; i++)) + do + touch "$TESTDIR"/"test" || return $? + zpool sync -f "$TESTPOOL" || return $? + done + return 0 +} + function get_practical_size { - L_SIZE="$1" + typeset L_SIZE="$1" if [ "$((L_SIZE % LABEL_SIZE))" -ne 0 ]; then echo "$(((L_SIZE / LABEL_SIZE) * LABEL_SIZE))" @@ -183,10 +200,11 @@ function get_practical_size function corrupt_sized_label_checksum { - L_SIZE="$1" - L_LABEL="$2" - L_DEVICE="$3" + typeset L_SIZE="$1" + typeset L_LABEL="$2" + typeset L_DEVICE="$3" + typeset L_PRACTICAL_SIZE= L_PRACTICAL_SIZE="$(get_practical_size "$L_SIZE")" typeset -a L_OFFSETS=("$LABEL_CKSUM_START" \ @@ -201,8 +219,8 @@ function corrupt_sized_label_checksum function corrupt_labels { - L_SIZE="$1" - L_DISK="$2" + typeset L_SIZE="$1" + typeset L_DISK="$2" corrupt_sized_label_checksum "$L_SIZE" 0 "$L_DISK" corrupt_sized_label_checksum "$L_SIZE" 1 "$L_DISK" @@ -212,11 +230,14 @@ function corrupt_labels function try_import_and_repair { - L_REPAIR_SHOULD_SUCCEED="$1" - L_IMPORT_SHOULD_SUCCEED="$2" - L_OP="$3" - L_POOLDISK="$4" + typeset L_REPAIR_SHOULD_SUCCEED="$1" + typeset L_IMPORT_SHOULD_SUCCEED="$2" + typeset L_OP="$3" + typeset L_POOLDISK="$4" + + typeset L_REPAIR_LOGOP= L_REPAIR_LOGOP="$(pick_logop "$L_REPAIR_SHOULD_SUCCEED")" + typeset L_IMPORT_LOGOP= L_IMPORT_LOGOP="$(pick_logop "$L_IMPORT_SHOULD_SUCCEED")" log_mustnot zpool import "$TESTPOOL" -d "$L_POOLDISK" @@ -230,10 +251,10 @@ function try_import_and_repair function prepare_vdev { - L_SIZE="$1" - L_BACKFILE="$2" + typeset L_SIZE="$1" + typeset L_BACKFILE="$2" - l_devname= + typeset l_devname= if truncate -s "$L_SIZE" "$L_BACKFILE"; then if is_linux; then l_devname="$(losetup -f "$L_BACKFILE" --show)" @@ -248,7 +269,7 @@ function prepare_vdev function run_test_one { - L_SIZE="$1" + typeset L_SIZE="$1" VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" log_must test -e "$VIRTUAL_DEVICE" @@ -257,6 +278,9 @@ function run_test_one setup_dataset + # Force 256 extra transactions to ensure all uberblocks are assigned a TXG + log_must force_transactions 256 + log_must zpool export "$TESTPOOL" corrupt_labels "$L_SIZE" "$VIRTUAL_DISK" @@ -272,7 +296,7 @@ function run_test_one function make_mirrored_pool { - L_SIZE="$1" + typeset L_SIZE="$1" VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" log_must test -e "$VIRTUAL_DEVICE" @@ -296,7 +320,7 @@ function export_and_cleanup_vdisk function run_test_two { - L_SIZE="$1" + typeset L_SIZE="$1" make_mirrored_pool "$L_SIZE" @@ -317,7 +341,7 @@ function run_test_two function run_test_three { - L_SIZE="$1" + typeset L_SIZE="$1" make_mirrored_pool "$L_SIZE" @@ -342,7 +366,7 @@ function run_test_three function run_test_four { - L_SIZE="$1" + typeset L_SIZE="$1" make_mirrored_pool "$L_SIZE" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh index ce159b555d20..b5b24322f882 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh @@ -18,13 +18,16 @@ # Strategy: # # 1. Create pool on a loopback device with some test data -# 2. Export the pool. -# 3. Corrupt all label checksums in the pool -# 4. Check that pool cannot be imported -# 5. Verify that it cannot be imported after using zhack label repair -u +# 2. Checksum repair should work with a valid TXG. Repeatedly write and +# sync the pool so there are enough transactions for every uberblock +# to have a TXG +# 3. Export the pool. +# 4. Corrupt all label checksums in the pool +# 5. Check that pool cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -u # to ensure that the -u option will quit on corrupted checksums. -# 6. Use zhack label repair -c on device -# 7. Check that pool can be imported and that data is intact +# 7. Use zhack label repair -c on device +# 8. Check that pool can be imported and that data is intact . "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh new file mode 100755 index 000000000000..0d2a39be6b5a --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh +# SPDX-License-Identifier: CDDL-1.0 + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack metaslab leak functions correctly +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Gather pool capacity stats +# 3. Generate fragmentation data with zdb +# 4. Destroy the pool +# 5. Create a new pool with the same configuration +# 6. Export the pool +# 7. Apply the fragmentation information with zhack metaslab leak +# 8. Import the pool +# 9. Verify that pool capacity stats match + +. "$STF_SUITE"/include/libtest.shlib + +verify_runnable "global" + +function cleanup +{ + zpool destroy $TESTPOOL + rm $tmp +} + +log_onexit cleanup +log_assert "zhack metaslab leak leaks the right amount of space" + +typeset tmp=$(mktemp) + +log_must zpool create $TESTPOOL $DISKS +for i in `seq 1 16`; do + log_must dd if=/dev/urandom of=/$TESTPOOL/f$i bs=1M count=16 + log_must zpool sync $TESTPOOL +done +for i in `seq 2 2 16`; do + log_must rm /$TESTPOOL/f$i +done +for i in `seq 1 16`; do + log_must touch /$TESTPOOL/g$i + log_must zpool sync $TESTPOOL +done + +alloc=$(zpool get -Hpo value alloc $TESTPOOL) +log_must eval "zdb -m --allocated-map $TESTPOOL > $tmp" +log_must zpool destroy $TESTPOOL + +log_must zpool create $TESTPOOL $DISKS +log_must zpool export $TESTPOOL +log_must eval "zhack metaslab leak $TESTPOOL < $tmp" +log_must zpool import $TESTPOOL + +alloc2=$(zpool get -Hpo value alloc $TESTPOOL) + +within_percent $alloc $alloc2 98 || + log_fail "space usage changed too much: $alloc to $alloc2" + +log_pass "zhack metaslab leak behaved correctly" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib index 091d65bb4f33..74780bb02141 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib @@ -27,6 +27,7 @@ # # Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright 2025 by Lawrence Livermore National Security, LLC. # . $STF_SUITE/include/libtest.shlib @@ -89,3 +90,44 @@ function save_dump_dev fi echo $dumpdev } + +function zpool_create_add_setup +{ + typeset -i i=0 + + while ((i < 10)); do + log_must truncate -s $MINVDEVSIZE $TEST_BASE_DIR/vdev$i + + eval vdev$i=$TEST_BASE_DIR/vdev$i + ((i += 1)) + done + + if is_linux; then + vdev_lo="$(losetup -f "$vdev4" --show)" + elif is_freebsd; then + vdev_lo=/dev/"$(mdconfig -a -t vnode -f "$vdev4")" + else + vdev_lo="$(lofiadm -a "$vdev4")" + fi +} + +function zpool_create_add_cleanup +{ + datasetexists $TESTPOOL1 && destroy_pool $TESTPOOL1 + + if [[ -e $vdev_lo ]]; then + if is_linux; then + log_must losetup -d "$vdev_lo" + elif is_freebsd; then + log_must mdconfig -d -u "$vdev_lo" + else + log_must lofiadm -d "$vdev_lo" + fi + fi + + typeset -i i=0 + while ((i < 10)); do + rm -f $TEST_BASE_DIR/vdev$i + ((i += 1)) + done +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_create.ksh index df085a2ec746..661e55998d8d 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_create.ksh @@ -23,67 +23,51 @@ # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -# -# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright 2012, 2016 by Delphix. All rights reserved. +# Copyright 2025 by Lawrence Livermore National Security, LLC. # . $STF_SUITE/include/libtest.shlib -. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib # # DESCRIPTION: -# Verify zpool add succeed when adding vdevs with matching redundancy. +# Verify zpool add succeeds when adding vdevs with matching redundancy +# and warns with differing redundancy for a healthy pool. # # STRATEGY: # 1. Create several files == $MINVDEVSIZE. # 2. Verify 'zpool add' succeeds with matching redundancy. # 3. Verify 'zpool add' warns with differing redundancy. -# 4. Verify 'zpool add' warns with differing redundancy after removal. # verify_runnable "global" -function cleanup -{ - datasetexists $TESTPOOL1 && destroy_pool $TESTPOOL1 - - typeset -i i=0 - while ((i < 10)); do - rm -f $TEST_BASE_DIR/vdev$i - ((i += 1)) - done -} - +log_assert "Verify 'zpool add' warns for differing redundancy." +log_onexit zpool_create_add_cleanup -log_assert "Verify 'zpool add' succeed with keywords combination." -log_onexit cleanup +zpool_create_add_setup -# 1. Create several files == $MINVDEVSIZE. typeset -i i=0 -while ((i < 10)); do - log_must truncate -s $MINVDEVSIZE $TEST_BASE_DIR/vdev$i - - eval vdev$i=$TEST_BASE_DIR/vdev$i - ((i += 1)) -done +typeset -i j=0 set -A redundancy0_create_args \ "$vdev0" set -A redundancy1_create_args \ "mirror $vdev0 $vdev1" \ - "raidz1 $vdev0 $vdev1" + "raidz1 $vdev0 $vdev1" \ + "draid1:1s $vdev0 $vdev1 $vdev9" set -A redundancy2_create_args \ "mirror $vdev0 $vdev1 $vdev2" \ - "raidz2 $vdev0 $vdev1 $vdev2" + "raidz2 $vdev0 $vdev1 $vdev2" \ + "draid2:1s $vdev0 $vdev1 $vdev2 $vdev9" set -A redundancy3_create_args \ "mirror $vdev0 $vdev1 $vdev2 $vdev3" \ - "raidz3 $vdev0 $vdev1 $vdev2 $vdev3" + "raidz3 $vdev0 $vdev1 $vdev2 $vdev3" \ + "draid3:1s $vdev0 $vdev1 $vdev2 $vdev3 $vdev9" set -A redundancy0_add_args \ "$vdev5" \ @@ -93,21 +77,19 @@ set -A redundancy1_add_args \ "mirror $vdev5 $vdev6" \ "raidz1 $vdev5 $vdev6" \ "raidz1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ - "mirror $vdev5 $vdev6 raidz1 $vdev7 $vdev8" + "mirror $vdev5 $vdev6 raidz1 $vdev7 $vdev8" \ + "draid1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ + "mirror $vdev5 $vdev6 draid1 $vdev7 $vdev8" set -A redundancy2_add_args \ "mirror $vdev5 $vdev6 $vdev7" \ - "raidz2 $vdev5 $vdev6 $vdev7" + "raidz2 $vdev5 $vdev6 $vdev7" \ + "draid2 $vdev5 $vdev6 $vdev7" set -A redundancy3_add_args \ "mirror $vdev5 $vdev6 $vdev7 $vdev8" \ - "raidz3 $vdev5 $vdev6 $vdev7 $vdev8" - -set -A log_args "log" "$vdev4" -set -A cache_args "cache" "$vdev4" -set -A spare_args "spare" "$vdev4" - -typeset -i j=0 + "raidz3 $vdev5 $vdev6 $vdev7 $vdev8" \ + "draid3 $vdev5 $vdev6 $vdev7 $vdev8" function zpool_create_add { @@ -148,30 +130,6 @@ function zpool_create_forced_add done } -function zpool_create_rm_add -{ - typeset -n create_args=$1 - typeset -n add_args=$2 - typeset -n rm_args=$3 - - i=0 - while ((i < ${#create_args[@]})); do - j=0 - while ((j < ${#add_args[@]})); do - log_must zpool create $TESTPOOL1 ${create_args[$i]} - log_must zpool add $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} - log_must zpool add $TESTPOOL1 ${add_args[$j]} - log_must zpool remove $TESTPOOL1 ${rm_args[1]} - log_mustnot zpool add $TESTPOOL1 ${rm_args[1]} - log_must zpool add $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} - log_must zpool destroy -f $TESTPOOL1 - - ((j += 1)) - done - ((i += 1)) - done -} - # 2. Verify 'zpool add' succeeds with matching redundancy. zpool_create_add redundancy0_create_args redundancy0_add_args zpool_create_add redundancy1_create_args redundancy1_add_args @@ -195,17 +153,4 @@ zpool_create_forced_add redundancy3_create_args redundancy0_add_args zpool_create_forced_add redundancy3_create_args redundancy1_add_args zpool_create_forced_add redundancy3_create_args redundancy2_add_args -# 4. Verify 'zpool add' warns with differing redundancy after removal. -zpool_create_rm_add redundancy1_create_args redundancy1_add_args log_args -zpool_create_rm_add redundancy2_create_args redundancy2_add_args log_args -zpool_create_rm_add redundancy3_create_args redundancy3_add_args log_args - -zpool_create_rm_add redundancy1_create_args redundancy1_add_args cache_args -zpool_create_rm_add redundancy2_create_args redundancy2_add_args cache_args -zpool_create_rm_add redundancy3_create_args redundancy3_add_args cache_args - -zpool_create_rm_add redundancy1_create_args redundancy1_add_args spare_args -zpool_create_rm_add redundancy2_create_args redundancy2_add_args spare_args -zpool_create_rm_add redundancy3_create_args redundancy3_add_args spare_args - -log_pass "'zpool add' succeed with keywords combination." +log_pass "Verify 'zpool add' warns for differing redundancy." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_degraded.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_degraded.ksh new file mode 100755 index 000000000000..313eb3666f27 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_degraded.ksh @@ -0,0 +1,204 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2012, 2016 by Delphix. All rights reserved. +# Copyright 2025 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib + +# +# DESCRIPTION: +# Verify zpool add succeeds when adding vdevs with matching redundancy +# and warns with differing redundancy for a degraded pool. +# +# STRATEGY: +# 1. Create several files == $MINVDEVSIZE. +# 2. Verify 'zpool add' succeeds with matching redundancy +# 3. Verify 'zpool add' warns with differing redundancy when +# a. Degraded pool with replaced mismatch vdev (file vs disk) +# b. Degraded pool dRAID distributed spare active +# c. Degraded pool hot spare active +# + +verify_runnable "global" + +log_assert "Verify 'zpool add' warns for differing redundancy." +log_onexit zpool_create_add_cleanup + +zpool_create_add_setup + +set -A redundancy1_create_args \ + "mirror $vdev0 $vdev1" \ + "raidz1 $vdev0 $vdev1" \ + "draid1:1s $vdev0 $vdev1 $vdev9" + +set -A redundancy2_create_args \ + "mirror $vdev0 $vdev1 $vdev2" \ + "raidz2 $vdev0 $vdev1 $vdev2" \ + "draid2:1s $vdev0 $vdev1 $vdev2 $vdev9" + +set -A redundancy3_create_args \ + "mirror $vdev0 $vdev1 $vdev2 $vdev3" \ + "raidz3 $vdev0 $vdev1 $vdev2 $vdev3" \ + "draid3:1s $vdev0 $vdev1 $vdev2 $vdev3 $vdev9" + +set -A redundancy1_add_args \ + "mirror $vdev5 $vdev6" \ + "raidz1 $vdev5 $vdev6" \ + "raidz1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ + "mirror $vdev5 $vdev6 raidz1 $vdev7 $vdev8" \ + "draid1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ + "mirror $vdev5 $vdev6 draid1 $vdev7 $vdev8" + +set -A redundancy2_add_args \ + "mirror $vdev5 $vdev6 $vdev7" \ + "raidz2 $vdev5 $vdev6 $vdev7" \ + "draid2 $vdev5 $vdev6 $vdev7" + +set -A redundancy3_add_args \ + "mirror $vdev5 $vdev6 $vdev7 $vdev8" \ + "raidz3 $vdev5 $vdev6 $vdev7 $vdev8" \ + "draid3 $vdev5 $vdev6 $vdev7 $vdev8" + +set -A redundancy1_create_draid_args \ + "draid1:1s $vdev0 $vdev1 $vdev2" + +set -A redundancy2_create_draid_args \ + "draid2:1s $vdev0 $vdev1 $vdev2 $vdev3" + +set -A redundancy3_create_draid_args \ + "draid3:1s $vdev0 $vdev1 $vdev2 $vdev3 $vdev9" + +set -A redundancy1_create_spare_args \ + "mirror $vdev0 $vdev1 spare $vdev_lo" \ + "raidz1 $vdev0 $vdev1 spare $vdev_lo" \ + "draid1 $vdev0 $vdev1 spare $vdev_lo" + +set -A redundancy2_create_spare_args \ + "mirror $vdev0 $vdev1 $vdev2 spare $vdev_lo" \ + "raidz2 $vdev0 $vdev1 $vdev2 spare $vdev_lo" \ + "draid2 $vdev0 $vdev1 $vdev2 spare $vdev_lo" + +set -A redundancy3_create_spare_args \ + "mirror $vdev0 $vdev1 $vdev2 $vdev3 spare $vdev_lo" \ + "raidz3 $vdev0 $vdev1 $vdev2 $vdev3 spare $vdev_lo" \ + "draid3 $vdev0 $vdev1 $vdev2 $vdev3 spare $vdev_lo" + +set -A replace_args "$vdev1" "$vdev_lo" +set -A draid1_args "$vdev1" "draid1-0-0" +set -A draid2_args "$vdev1" "draid2-0-0" +set -A draid3_args "$vdev1" "draid3-0-0" + +typeset -i i=0 +typeset -i j=0 + +function zpool_create_degraded_add +{ + typeset -n create_args=$1 + typeset -n add_args=$2 + typeset -n rm_args=$3 + + i=0 + while ((i < ${#create_args[@]})); do + j=0 + while ((j < ${#add_args[@]})); do + log_must zpool create $TESTPOOL1 ${create_args[$i]} + log_must zpool offline -f $TESTPOOL1 ${rm_args[0]} + log_must zpool replace -w $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} + log_must zpool add $TESTPOOL1 ${add_args[$j]} + log_must zpool destroy -f $TESTPOOL1 + log_must zpool labelclear -f ${rm_args[0]} + + ((j += 1)) + done + ((i += 1)) + done +} + +function zpool_create_forced_degraded_add +{ + typeset -n create_args=$1 + typeset -n add_args=$2 + typeset -n rm_args=$3 + + i=0 + while ((i < ${#create_args[@]})); do + j=0 + while ((j < ${#add_args[@]})); do + log_must zpool create $TESTPOOL1 ${create_args[$i]} + log_must zpool offline -f $TESTPOOL1 ${rm_args[0]} + log_must zpool replace -w $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} + log_mustnot zpool add $TESTPOOL1 ${add_args[$j]} + log_must zpool add --allow-replication-mismatch $TESTPOOL1 ${add_args[$j]} + log_must zpool destroy -f $TESTPOOL1 + log_must zpool labelclear -f ${rm_args[0]} + + ((j += 1)) + done + ((i += 1)) + done +} + +# 2. Verify 'zpool add' succeeds with matching redundancy and a degraded pool. +zpool_create_degraded_add redundancy1_create_args redundancy1_add_args replace_args +zpool_create_degraded_add redundancy2_create_args redundancy2_add_args replace_args +zpool_create_degraded_add redundancy3_create_args redundancy3_add_args replace_args + +# 3. Verify 'zpool add' warns with differing redundancy and a degraded pool. +# +# a. Degraded pool with replaced mismatch vdev (file vs disk) +zpool_create_forced_degraded_add redundancy1_create_args redundancy2_add_args replace_args +zpool_create_forced_degraded_add redundancy1_create_args redundancy3_add_args replace_args + +zpool_create_forced_degraded_add redundancy2_create_args redundancy1_add_args replace_args +zpool_create_forced_degraded_add redundancy2_create_args redundancy3_add_args replace_args + +zpool_create_forced_degraded_add redundancy3_create_args redundancy1_add_args replace_args +zpool_create_forced_degraded_add redundancy3_create_args redundancy2_add_args replace_args + +# b. Degraded pool dRAID distributed spare active + +zpool_create_forced_degraded_add redundancy1_create_draid_args redundancy2_add_args draid1_args +zpool_create_forced_degraded_add redundancy1_create_draid_args redundancy3_add_args draid1_args + +zpool_create_forced_degraded_add redundancy2_create_draid_args redundancy1_add_args draid2_args +zpool_create_forced_degraded_add redundancy2_create_draid_args redundancy3_add_args draid2_args + +zpool_create_forced_degraded_add redundancy3_create_draid_args redundancy1_add_args draid3_args +zpool_create_forced_degraded_add redundancy3_create_draid_args redundancy2_add_args draid3_args + +# c. Degraded pool hot spare active +zpool_create_forced_degraded_add redundancy1_create_spare_args redundancy2_add_args replace_args +zpool_create_forced_degraded_add redundancy1_create_spare_args redundancy3_add_args replace_args + +zpool_create_forced_degraded_add redundancy2_create_spare_args redundancy1_add_args replace_args +zpool_create_forced_degraded_add redundancy2_create_spare_args redundancy3_add_args replace_args + +zpool_create_forced_degraded_add redundancy3_create_spare_args redundancy1_add_args replace_args +zpool_create_forced_degraded_add redundancy3_create_spare_args redundancy2_add_args replace_args + +log_pass "Verify 'zpool add' warns for differing redundancy." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_removal.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_removal.ksh new file mode 100755 index 000000000000..782858e301ac --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_warn_removal.ksh @@ -0,0 +1,126 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2012, 2016 by Delphix. All rights reserved. +# Copyright 2025 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib + +# +# DESCRIPTION: +# Verify zpool add succeeds when adding vdevs with matching redundancy +# and warns with differing redundancy after removal. +# +# STRATEGY: +# 1. Create several files == $MINVDEVSIZE. +# 2. Verify 'zpool add' warns with differing redundancy after removal. +# + +verify_runnable "global" + +log_assert "Verify 'zpool add' warns for differing redundancy." +log_onexit zpool_create_add_cleanup + +zpool_create_add_setup + +typeset -i i=0 +typeset -i j=0 + +set -A redundancy1_create_args \ + "mirror $vdev0 $vdev1" \ + "raidz1 $vdev0 $vdev1" \ + "draid1:1s $vdev0 $vdev1 $vdev9" + +set -A redundancy2_create_args \ + "mirror $vdev0 $vdev1 $vdev2" \ + "raidz2 $vdev0 $vdev1 $vdev2" \ + "draid2:1s $vdev0 $vdev1 $vdev2 $vdev9" + +set -A redundancy3_create_args \ + "mirror $vdev0 $vdev1 $vdev2 $vdev3" \ + "raidz3 $vdev0 $vdev1 $vdev2 $vdev3" \ + "draid3:1s $vdev0 $vdev1 $vdev2 $vdev3 $vdev9" + +set -A redundancy1_add_args \ + "mirror $vdev5 $vdev6" \ + "raidz1 $vdev5 $vdev6" \ + "raidz1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ + "mirror $vdev5 $vdev6 raidz1 $vdev7 $vdev8" \ + "draid1 $vdev5 $vdev6 mirror $vdev7 $vdev8" \ + "mirror $vdev5 $vdev6 draid1 $vdev7 $vdev8" + +set -A redundancy2_add_args \ + "mirror $vdev5 $vdev6 $vdev7" \ + "raidz2 $vdev5 $vdev6 $vdev7" \ + "draid2 $vdev5 $vdev6 $vdev7" + +set -A redundancy3_add_args \ + "mirror $vdev5 $vdev6 $vdev7 $vdev8" \ + "raidz3 $vdev5 $vdev6 $vdev7 $vdev8" \ + "draid3 $vdev5 $vdev6 $vdev7 $vdev8" + +set -A log_args "log" "$vdev_lo" +set -A cache_args "cache" "$vdev_lo" +set -A spare_args "spare" "$vdev_lo" + + +function zpool_create_rm_add +{ + typeset -n create_args=$1 + typeset -n add_args=$2 + typeset -n rm_args=$3 + + i=0 + while ((i < ${#create_args[@]})); do + j=0 + while ((j < ${#add_args[@]})); do + log_must zpool create $TESTPOOL1 ${create_args[$i]} + log_must zpool add $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} + log_must zpool add $TESTPOOL1 ${add_args[$j]} + log_must zpool remove $TESTPOOL1 ${rm_args[1]} + log_mustnot zpool add $TESTPOOL1 ${rm_args[1]} + log_must zpool add $TESTPOOL1 ${rm_args[0]} ${rm_args[1]} + log_must zpool destroy -f $TESTPOOL1 + + ((j += 1)) + done + ((i += 1)) + done +} + +# 2. Verify 'zpool add' warns with differing redundancy after removal. +zpool_create_rm_add redundancy1_create_args redundancy1_add_args log_args +zpool_create_rm_add redundancy2_create_args redundancy2_add_args log_args +zpool_create_rm_add redundancy3_create_args redundancy3_add_args log_args + +zpool_create_rm_add redundancy1_create_args redundancy1_add_args cache_args +zpool_create_rm_add redundancy2_create_args redundancy2_add_args cache_args +zpool_create_rm_add redundancy3_create_args redundancy3_add_args cache_args + +zpool_create_rm_add redundancy1_create_args redundancy1_add_args spare_args +zpool_create_rm_add redundancy2_create_args redundancy2_add_args spare_args +zpool_create_rm_add redundancy3_create_args redundancy3_add_args spare_args diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/cleanup.ksh new file mode 100755 index 000000000000..099b5426031d --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# +# +. $STF_SUITE/include/libtest.shlib + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/setup.ksh new file mode 100755 index 000000000000..3529a0ccc015 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# +# +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat.kshlib new file mode 100644 index 000000000000..ea4b0bd2756d --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat.kshlib @@ -0,0 +1,235 @@ +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +# Since we want to make sure that iostat responds correctly as pools appear and +# disappear, we run it in the background and capture its output to a file. +# Once we're done, we parse the output and ensure it matches what we'd expect +# from the operations we performed. +# +# Because iostat is producing output every interval, it may produce the "same" +# output for each step of the change; in fact, we want that to make sure we +# don't miss anything. So, we describe what we expect as a series of "chunks". +# Each chunk is a particular kind of output, which may repeat. Current known +# chunk types are: +# +# NOPOOL: the text "no pools available" +# HEADER: three lines, starting with "capacity", "pool" and "----" respectively. +# (the rough shape of the normal iostat header). +# POOL1: a line starting with "pool1" (stats line for a pool of that name) +# POOL2: a line starting with "pool2" +# POOLBOTH: three lines, starting with "pool1", "pool2" (either order) and +# "-----" respectively. (the pool stat output for multiple pools) +# +# (the parser may produce other chunks in a failed parse to assist with +# debugging, but they should never be part of the "wanted" output See the +# parser commentary below). +# +# To help recognise the start of a new interval output, we run iostat with the +# -T u option, which will output a numeric timestamp before each header or +# second-or-later pool stat after the header. +# +# To keep the test run shorter, we use a subsecond interval, but to make sure +# nothing is missed, we sleep for three intervals after each change. + +typeset _iostat_out=$(mktemp) +typeset _iostat_pid="" + +function cleanup_iostat { + if [[ -n $_iostat_pid ]] ; then + kill -KILL $_iostat_pid || true + fi + rm -f $_iostat_out +} + +function start_iostat { + zpool iostat -T u $@ 0.1 > $_iostat_out 2>&1 & + _iostat_pid=$! +} + +function stop_iostat { + kill -TERM $_iostat_pid + wait $_iostat_pid + _iostat_pid="" +} + +function delay_iostat { + sleep 0.3 +} + +typeset -a _iostat_expect +function expect_iostat { + typeset chunk=$1 + _iostat_expect+=($chunk) +} + +# Parse the output The `state` var is used to track state across +# multiple lines. The `last` var and the `_got_iostat` function are used +# to record the completed chunks, and to collapse repetitions. +typeset -a _iostat_got +typeset _iostat_last="" +typeset _iostat_state="" + +function _got_iostat { + typeset chunk=$1 + if [[ -n $chunk && $_iostat_last != $chunk ]] ; then + _iostat_last=$chunk + _iostat_got+=($chunk) + fi + _iostat_state="" +} + +function verify_iostat { + + cat $_iostat_out | while read line ; do + + # The "no pools available" text has no timestamp or other + # header, and should never appear in the middle of multiline + # chunk, so we can close any in-flight state. + if [[ $line = "no pools available" ]] ; then + _got_iostat $_iostat_state + _got_iostat "NOPOOL" + continue + fi + + # A run of digits alone on the line is a timestamp (the `-T u` + # switch to `iostat`). It closes any in-flight state as a + # complete chunk, and indicates the start of a new chunk. + if [[ -z ${line/#+([0-9])/} ]] ; then + _got_iostat $_iostat_state + _iostat_state="TIMESTAMP" + continue + fi + + # For this test, the first word of each line should be unique, + # so we extract it and use it for simplicity. + typeset first=${line%% *} + + # Header is emitted whenever the pool list changes. It has + # three lines: + # + # capacity operations bandwidth + # pool alloc free read write read write + # ---------- ----- ----- ----- ----- ----- ----- + # + # Each line moves the state; when we get to a run of dashes, we + # commit. Note that we check for one-or-more dashes, because + # the width can vary depending on the length of pool name. + # + if [[ $_iostat_state = "TIMESTAMP" && + $first = "capacity" ]] ; then + _iostat_state="INHEADER1" + continue + fi + if [[ $_iostat_state = "INHEADER1" && + $first = "pool" ]] ; then + _iostat_state="INHEADER2" + continue + fi + if [[ $_iostat_state = "INHEADER2" && + -z ${first/#+(-)/} ]] ; then + # Headers never repeat, so if the last committed chunk + # was a header, we commit this one as EXTRAHEADER so we + # can see it in the error output. + if [[ $_iostat_last = "HEADER" ]] ; then + _got_iostat "EXTRAHEADER" + elif [[ $_iostat_last != "EXTRAHEADER" ]] ; then + _got_iostat "HEADER" + fi + _iostat_state="HEADER" + continue + fi + + # A pool stat line looks like: + # + # pool1 147K 240M 0 0 0 0 + # + # If there are multiple pools, iostat follows them with a + # separator of dashed lines: + # + # pool1 147K 240M 0 0 0 0 + # pool2 147K 240M 0 0 0 0 + # ---------- ----- ----- ----- ----- ----- ----- + # + # Stats rows always start after a timestamp or a header. If the + # header was emitted, we won't see a timestamp here (it goes + # before the header). + # + # Because our test exercises both pools on their own and + # together, we allow pools in either order. In practice they + # are sorted, but that's a side-effect of the implementation + # (see zpool_compare()), so we're not going to rely on it here. + if [[ $first = "pool1" ]] || [[ $first = "pool2" ]] ; then + + # First line, track which one we saw. If it's a + # standalone line, it will be committed by the next + # NOPOOL or TIMESTAMP above (or the `_got_iostat` after + # the loop if this is the last line). + if [[ $_iostat_state == "TIMESTAMP" || + $_iostat_state == "HEADER" ]] ; then + if [[ $first = "pool1" ]] ; then + _iostat_state="POOL1" + elif [[ $first = "pool2" ]] ; then + _iostat_state="POOL2" + fi + continue + fi + + # If this is the second pool, we're in a multi-pool + # block, and need to look for the separator to close it + # out. + if [[ $_iostat_state = "POOL1" && $first = "pool2" ]] || + [[ $_iostat_state = "POOL2" && $first = "pool1" ]] ; + then + _iostat_state="INPOOLBOTH" + continue + fi + fi + + # Separator after the stats block. + if [[ $_iostat_state = "INPOOLBOTH" && + -z ${first/#+(-)/} ]] ; then + _got_iostat "POOLBOTH" + continue + fi + + # Anything else will fall through to here. We commit any + # in-flight state, then "UNKNOWN", all to help with debugging.. + if [[ $_iostat_state != "UNKNOWN" ]] ; then + _got_iostat $_iostat_state + _got_iostat "UNKNOWN" + fi + done + + # Close out any remaining state. + _got_iostat $_iostat_state + + # Compare what we wanted with what we got, and pass/fail the test! + if [[ "${_iostat_expect[*]}" != "${_iostat_got[*]}" ]] ; then + log_note "expected: ${_iostat_expect[*]}" + log_note " got: ${_iostat_got[*]}" + log_fail "zpool iostat did not produce expected output" + fi +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_all.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_all.ksh new file mode 100755 index 000000000000..8e040058ec3e --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_all.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +# `zpool iostat <N>` should keep running and update the pools it displays as +# pools are created/destroyed/imported/export. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_iostat/zpool_iostat.kshlib + +typeset vdev1=$(mktemp) +typeset vdev2=$(mktemp) + +function cleanup { + cleanup_iostat + + poolexists pool1 && destroy_pool pool1 + poolexists pool2 && destroy_pool pool2 + rm -f $vdev1 $vdev2 +} + +log_must mkfile $MINVDEVSIZE $vdev1 $vdev2 + +expect_iostat "NOPOOL" + +start_iostat + +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOL1" +log_must zpool create pool1 $vdev1 +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOLBOTH" +log_must zpool create pool2 $vdev2 +delay_iostat + +expect_iostat "NOPOOL" +log_must zpool export -a +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOL2" +log_must zpool import -d $vdev2 pool2 +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOLBOTH" +log_must zpool import -d $vdev1 pool1 +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOL2" +log_must zpool destroy pool1 +delay_iostat + +expect_iostat "NOPOOL" +log_must zpool destroy pool2 +delay_iostat + +stop_iostat + +verify_iostat + +log_pass "zpool iostat in interval mode follows pool updates" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_some.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_some.ksh new file mode 100755 index 000000000000..ab1f258aa1cd --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_iostat/zpool_iostat_interval_some.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +# `zpool iostat <pools> <N>` should keep running and only show the listed pools. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_iostat/zpool_iostat.kshlib + +typeset vdev1=$(mktemp) +typeset vdev2=$(mktemp) + +function cleanup { + cleanup_iostat + + poolexists pool1 && destroy_pool pool1 + poolexists pool2 && destroy_pool pool2 + rm -f $vdev1 $vdev2 +} + +log_must mkfile $MINVDEVSIZE $vdev1 $vdev2 + +log_must zpool create pool1 $vdev1 +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOL1" +start_iostat pool1 +delay_iostat + +log_must zpool create pool2 $vdev2 +delay_iostat + +expect_iostat "NOPOOL" +log_must zpool export -a +delay_iostat + +log_must zpool import -d $vdev2 pool2 +delay_iostat + +expect_iostat "HEADER" +expect_iostat "POOL1" +log_must zpool import -d $vdev1 pool1 +delay_iostat + +expect_iostat "NOPOOL" +log_must zpool destroy pool1 +delay_iostat + +log_must zpool destroy pool2 +delay_iostat + +stop_iostat + +verify_iostat + +log_pass "zpool iostat in interval mode with pools follows listed pool updates" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh new file mode 100755 index 000000000000..9327df81a5c5 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2025 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +# +# DESCRIPTION: +# Verify that the timestamp database updates all the tables as expected. +# +# STRATEGY: +# 1. Decrease the note and flush frequency of the txg database. +# 2. Force the pool to sync several txgs +# 3. Verify that there are entries in each of the "month", "day", and +# "minute" tables. +# + +verify_runnable "global" + +function cleanup +{ + log_must restore_tunable SPA_NOTE_TXG_TIME + log_must restore_tunable SPA_FLUSH_TXG_TIME + rm /$TESTPOOL/f1 +} + +log_onexit cleanup + +log_assert "Verifiy timestamp databases all update as expected." + +log_must save_tunable SPA_NOTE_TXG_TIME +log_must set_tunable64 SPA_NOTE_TXG_TIME 1 +log_must save_tunable SPA_FLUSH_TXG_TIME +log_must set_tunable64 SPA_FLUSH_TXG_TIME 1 + +log_must touch /$TESTPOOL/f1 +log_must zpool sync $TESTPOOL +sleep 1 +log_must touch /$TESTPOOL/f1 +log_must zpool sync $TESTPOOL +sleep 1 +log_must touch /$TESTPOOL/f1 +log_must zpool sync $TESTPOOL + +mos_zap="$(zdb -dddd $TESTPOOL 1)" +minutes_entries=$(echo "$mos_zap" | grep "txg_log_time:minutes" | awk '{print $5}') +days_entries=$(echo "$mos_zap" | grep "txg_log_time:days" | awk '{print $5}') +months_entries=$(echo "$mos_zap" | grep "txg_log_time:months" | awk '{print $5}') + +[[ "$minutes_entries" -ne "0" ]] || log_fail "0 entries in the minutes table" +[[ "$days_entries" -ne "0" ]] || log_fail "0 entries in the days table" +[[ "$months_entries" -ne "0" ]] || log_fail "0 entries in the months table" + +log_pass "Verified all timestamp databases had entries as expected." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arcstat_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcstat_001_pos.ksh index 700bd9a6f529..d63b72f8039d 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arcstat_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcstat_001_pos.ksh @@ -37,7 +37,7 @@ log_assert "arcstat generates output and doesn't return an error code" typeset -i i=0 while [[ $i -lt ${#args[*]} ]]; do - log_must eval "arcstat ${args[i]} > /dev/null" + log_must eval "zarcstat ${args[i]} > /dev/null" ((i = i + 1)) done log_pass "arcstat generates output and doesn't return an error code" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_001_pos.ksh index 0840878fdb0d..b7faac5243c9 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_001_pos.ksh @@ -30,16 +30,16 @@ is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing" -log_assert "arc_summary generates output and doesn't return an error code" +log_assert "zarcsummary generates output and doesn't return an error code" # Without this, the below checks aren't going to work the way we hope... set -o pipefail for arg in "" "-a" "-d" "-p 1" "-g" "-s arc" "-r"; do - log_must eval "arc_summary $arg > /dev/null" + log_must eval "zarcsummary $arg > /dev/null" done -log_must eval "arc_summary | head > /dev/null" -log_must eval "arc_summary | head -1 > /dev/null" +log_must eval "zarcsummary | head > /dev/null" +log_must eval "zarcsummary | head -1 > /dev/null" -log_pass "arc_summary generates output and doesn't return an error code" +log_pass "zarcsummary generates output and doesn't return an error code" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_002_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_002_neg.ksh index ec4abe35409d..227646777ba0 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_002_neg.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_002_neg.ksh @@ -30,10 +30,10 @@ is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing" -log_assert "arc_summary generates an error code with invalid options" +log_assert "zarcsummary generates an error code with invalid options" for arg in "-x" "-5" "-p 7" "--err" "-@"; do - log_mustnot eval "arc_summary $arg > /dev/null" + log_mustnot eval "zarcsummary $arg > /dev/null" done -log_pass "arc_summary generates an error code with invalid options" +log_pass "zarcsummary generates an error code with invalid options" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh new file mode 100755 index 000000000000..4a59e15cc693 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh @@ -0,0 +1,43 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib + + +poolexists $TESTPOOL1 && \ + destroy_pool $TESTPOOL1 + +del_user $STAFF1 +del_user $STAFF2 +del_group $STAFF_GROUP + +del_user $OTHER1 +del_user $OTHER2 +del_group $OTHER_GROUP + +default_cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh new file mode 100755 index 000000000000..0978193eddc4 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib + +# Create staff group and add two user to it +log_must add_group $STAFF_GROUP +if ! id $STAFF1 > /dev/null 2>&1; then + log_must add_user $STAFF_GROUP $STAFF1 +fi +if ! id $STAFF2 > /dev/null 2>&1; then + log_must add_user $STAFF_GROUP $STAFF2 +fi + +# Create other group and add two user to it +log_must add_group $OTHER_GROUP +if ! id $OTHER1 > /dev/null 2>&1; then + log_must add_user $OTHER_GROUP $OTHER1 +fi +if ! id $OTHER2 > /dev/null 2>&1; then + log_must add_user $OTHER_GROUP $OTHER2 +fi +DISK=${DISKS%% *} + +default_raidz_setup $DISKS diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh new file mode 100755 index 000000000000..f62f2b07929c --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh @@ -0,0 +1,145 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara Inc. +# + +# STRATEGY: +# 1. Create a pool (this is done by the test framework) +# 2. Create a user +# 3. Create an encrypted dataset +# 4. Write random data to the encrypted dataset +# 5. Snapshot the dataset +# 6. As root: attempt a send and raw send (both should succeed) +# 7. As user: attempt a send and raw send (both should fail, no permission) +# 8. Create a delegation (zfs allow -u user send testpool/encrypted_dataset) +# 9. As root: attempt a send and raw send (both should succeed) +# 10. As user: attempt a send and raw send (both should succeed) +# 11. Create a delegation (zfs allow -u user sendraw testpool/encrypted_dataset) +# 12. As root: attempt a send and raw send (both should succeed) +# 13. As user: attempt a send and raw send (send should fail, raw send should succeed) +# 14. Disable delegation (zfs unallow) +# 15. As root: attempt a send and raw send (both should succeed) +# 16. As user: attempt a send and raw send (both should fail, no permission) +# 17. Clean up (handled by framework) +# root tests to verify this doesnt affect root user under ../cli_root/zfs_send_delegation/ +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib +. $STF_SUITE/tests/functional/delegate/delegate.cfg + +# create encrypted dataset + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS1" + +# create target dataset for receives +log_must zfs create $TESTPOOL/$TESTFS2 + +# set user perms +# need to run chown for fs permissions for $OTHER1 +typeset perms="snapshot,reservation,compression,checksum,userprop,receive,mount,create" + +log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS1 +log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS2 +log_must chown ${OTHER1}:${OTHER_GROUP} /$TESTPOOL/$TESTFS2 + +# create random data +log_must fill_fs $TESTPOOL/$TESTFS1/child 1 2047 1024 1 R + +# snapshot +log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1 + +# note +# we need to use `sh -c` here becuase the quoting on <<<"$*" in the user_run wrapper is broken once pipes and redirects get involved + +# check baseline send abilities (should fail) +log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream.$$'" +# verify nothing went through +if [ -s $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream.$$ ] +then + log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream !" +fi +log_mustnot user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream.$$'" +# verify nothing went through +if [ -s $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream.$$ ] +then + log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream !" +fi + +# create delegation +log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1 + +# attempt send with full allow (should pass) +log_must user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv1_user_datastream.$$'" +log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv1raw_user_datastream.$$'" + + +# create raw delegation +log_must zfs allow $OTHER1 send:raw $TESTPOOL/$TESTFS1 +# We have to remove 'send' to confirm 'send raw' only allows what we want +log_must zfs unallow -u $OTHER1 send $TESTPOOL/$TESTFS1 + +# test new sendraw abilities (send should fail, sendraw should pass) +log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream.$$'" + verify nothing went through +if [ -s $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream.$$ ] +then + log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream !" +fi +log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv2raw_user_datastream.$$'" + +# disable raw delegation +log_must zfs unallow -u $OTHER1 send:raw $TESTPOOL/$TESTFS1 +log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1 + +# test with raw taken away (should pass) +log_must user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv3_user_datastream.$$'" +log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv3raw_user_datastream.$$'" + +# disable send abilities +log_must zfs unallow -u $OTHER1 send $TESTPOOL/$TESTFS1 + +# verify original send abilities (should fail) +log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream.$$'" + verify nothing went through +if [ -s $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream.$$ ] +then + log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream !" +fi +log_mustnot user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream.$$'" + verify nothing went through +if [ -s $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream.$$ ] +then + log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream !" +fi + + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + destroy_dataset $TESTPOOL/$TESTFS1 -r \ + destroy_dataset $TESTPOOL/$TESTFS2 -r + +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib index 0a402e71ee68..345239b88680 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib @@ -1234,10 +1234,10 @@ function verify_fs_aedsx typeset oldval set -A modes "on" "off" oldval=$(get_prop $perm $fs) - if [[ $oldval == "on" ]]; then - n=1 - elif [[ $oldval == "off" ]]; then + if [[ $oldval == "off" ]]; then n=0 + else + n=1 fi log_note "$user zfs set $perm=${modes[$n]} $fs" user_run $user zfs set $perm=${modes[$n]} $fs diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh new file mode 100755 index 000000000000..d5feb6936b4b --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh @@ -0,0 +1,106 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2025 by Klara, Inc. + +# DESCRIPTION: +# Verify that vdevs 'sit out' when they are slow +# +# STRATEGY: +# 1. Create various raidz/draid pools +# 2. Degrade/fault one of the disks. +# 3. Inject delays into one of the disks +# 4. Verify disk is set to 'sit out' for awhile. +# 5. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted. +# + +. $STF_SUITE/include/libtest.shlib + +function cleanup +{ + restore_tunable READ_SIT_OUT_SECS + restore_tunable SIT_OUT_CHECK_INTERVAL + log_must zinject -c all + log_must zpool events -c + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/vdev.$$.* +} + +log_assert "Verify sit_out works" + +log_onexit cleanup + +# shorten sit out period for testing +save_tunable READ_SIT_OUT_SECS +set_tunable32 READ_SIT_OUT_SECS 5 + +save_tunable SIT_OUT_CHECK_INTERVAL +set_tunable64 SIT_OUT_CHECK_INTERVAL 20 + +log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9} + +for raidtype in raidz2 raidz3 draid2 draid3 ; do + log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9} + log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0" + log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400 + log_must zpool export $TESTPOOL2 + log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2 + + BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9 + SLOW_VDEV=$TEST_BASE_DIR/vdev.$$.8 + + # Initial state should not be sitting out + log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off" ]] + + # Delay our reads 200ms to trigger sit out + log_must zinject -d $SLOW_VDEV -D200:1 -T read $TESTPOOL2 + type=$((RANDOM % 2)) + [[ "$type" -eq "0" ]] && action="degrade" || action="fault" + log_must zinject -d $BAD_VDEV -A $action -T read $TESTPOOL2 + + # Do some reads and wait for us to sit out + for i in {0..99} ; do + dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null + + sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV) + if [[ "$sit_out" == "on" ]] ; then + break + fi + done + + log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "on" + + # Clear fault injection + log_must zinject -c all + + # Wait for us to exit our sit out period + log_must wait_sit_out $TESTPOOL2 $SLOW_VDEV 10 + + log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off" + destroy_pool $TESTPOOL2 + log_must zpool labelclear -f $BAD_VDEV +done + +log_pass "sit_out works correctly" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh new file mode 100755 index 000000000000..37f616cf56ee --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh @@ -0,0 +1,102 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. + +# DESCRIPTION: +# Verify that vdevs 'sit out' when they are slow +# +# STRATEGY: +# 1. Create various raidz/draid pools +# 2. Inject delays into one of the disks +# 3. Verify disk is set to 'sit out' for awhile. +# 4. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted. +# + +. $STF_SUITE/include/libtest.shlib + +function cleanup +{ + restore_tunable READ_SIT_OUT_SECS + restore_tunable SIT_OUT_CHECK_INTERVAL + log_must zinject -c all + log_must zpool events -c + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/vdev.$$.* +} + +log_assert "Verify sit_out works" + +log_onexit cleanup + +# shorten sit out period for testing +save_tunable READ_SIT_OUT_SECS +set_tunable32 READ_SIT_OUT_SECS 5 + +save_tunable SIT_OUT_CHECK_INTERVAL +set_tunable64 SIT_OUT_CHECK_INTERVAL 20 + +log_must truncate -s200M $TEST_BASE_DIR/vdev.$$.{0..9} + +for raidtype in raidz raidz2 raidz3 draid1 draid2 draid3 ; do + log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9} + log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0" + log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=600 + log_must zpool export $TESTPOOL2 + log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2 + + BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9 + + # Initial state should not be sitting out + log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off" ]] + + # Delay our reads 200ms to trigger sit out + log_must zinject -d $BAD_VDEV -D200:1 -T read $TESTPOOL2 + + # Do some reads and wait for us to sit out + for i in {0..99} ; do + dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 200)) bs=2M count=1 of=/dev/null + + sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV) + if [[ "$sit_out" == "on" ]] ; then + break + fi + done + + log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "on" + + # Clear fault injection + log_must zinject -c all + + # Wait for us to exit our sit out period + log_must wait_sit_out $TESTPOOL2 $BAD_VDEV 10 + + # Verify sit_out was cleared during wait_sit_out + log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off" + + destroy_pool $TESTPOOL2 +done + +log_pass "sit_out works correctly" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh new file mode 100755 index 000000000000..457105a66453 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh @@ -0,0 +1,116 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2025 by Klara, Inc. + +# DESCRIPTION: +# Verify that we don't sit out too many vdevs +# +# STRATEGY: +# 1. Create draid2 pool +# 2. Inject delays into three of the disks +# 3. Do reads to trigger sit-outs +# 4. Verify exactly 2 disks sit out +# + +. $STF_SUITE/include/libtest.shlib + +function cleanup +{ + restore_tunable READ_SIT_OUT_SECS + restore_tunable SIT_OUT_CHECK_INTERVAL + log_must zinject -c all + log_must zpool events -c + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/vdev.$$.* +} + +log_assert "Verify sit_out works" + +log_onexit cleanup + +save_tunable SIT_OUT_CHECK_INTERVAL +set_tunable64 SIT_OUT_CHECK_INTERVAL 20 + +log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9} + +log_must zpool create $TESTPOOL2 draid2 $TEST_BASE_DIR/vdev.$$.{0..9} +log_must zpool set autosit=on $TESTPOOL2 draid2-0 +log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400 +log_must zpool export $TESTPOOL2 +log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2 + +BAD_VDEV1=$TEST_BASE_DIR/vdev.$$.7 +BAD_VDEV2=$TEST_BASE_DIR/vdev.$$.8 +BAD_VDEV3=$TEST_BASE_DIR/vdev.$$.9 + +# Initial state should not be sitting out +log_must eval [[ "$(get_vdev_prop autosit $TESTPOOL2 draid2-0)" == "on" ]] +log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "off" ]] +log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "off" ]] +log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off" ]] + +# Delay our reads 200ms to trigger sit out +log_must zinject -d $BAD_VDEV1 -D200:1 -T read $TESTPOOL2 + +# Do some reads and wait for us to sit out +for i in {0..99} ; do + dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null + + sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1) + if [[ "$sit_out" == "on" ]] ; then + break + fi +done +log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "on" + +log_must zinject -d $BAD_VDEV2 -D200:1 -T read $TESTPOOL2 +# Do some reads and wait for us to sit out +for i in {0..99} ; do + dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null + + sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2) + if [[ "$sit_out" == "on" ]] ; then + break + fi +done +log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "on" + +log_must zinject -d $BAD_VDEV3 -D200:1 -T read $TESTPOOL2 +# Do some reads and wait for us to sit out +for i in {0..99} ; do + dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null & + dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null + + sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3) + if [[ "$sit_out" == "on" ]] ; then + break + fi +done +log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off" + + +log_pass "sit_out works correctly" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh new file mode 100755 index 000000000000..6b732ea96d0c --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh @@ -0,0 +1,149 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify ZED synchronous zedlets work as expected +# +# STRATEGY: +# 1. Create a scrub_start zedlet that runs quickly +# 2. Create a scrub_start zedlet that runs slowly (takes seconds) +# 3. Create a scrub_finish zedlet that is synchronous and runs slowly +# 4. Create a trim_start zedlet that runs quickly +# 4. Scrub the pool +# 5. Trim the pool +# 6. Verify the synchronous scrub_finish zedlet waited for the scrub_start +# zedlets to finish (including the slow one). If the scrub_finish zedlet +# was not synchronous, it would have completed before the slow scrub_start +# zedlet. +# 7. Verify the trim_start zedlet waited for the slow synchronous scrub_finish +# zedlet to complete. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/events/events_common.kshlib + +verify_runnable "both" + +OUR_ZEDLETS="scrub_start-async.sh scrub_start-slow.sh scrub_finish-sync-slow.sh trim_start-async.sh" + +OUTFILE="$TEST_BASE_DIR/zed_synchronous_zedlet_lines" +TESTPOOL2=testpool2 + +function cleanup +{ + zed_stop + + for i in $OUR_ZEDLETS ; do + log_must rm -f $ZEDLET_DIR/$i + done + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/vdev-file-sync-zedlet + log_must rm -f $OUTFILE +} + +log_assert "Verify ZED synchronous zedlets work as expected" + +log_onexit cleanup + +# Make a pool +log_must truncate -s 100M $TEST_BASE_DIR/vdev-file-sync-zedlet +log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/vdev-file-sync-zedlet + +# Do an initial scrub +log_must zpool scrub -w $TESTPOOL2 + +log_must zpool events -c + +mkdir -p $ZEDLET_DIR + +# Create zedlets +cat << EOF > $ZEDLET_DIR/scrub_start-async.sh +#!/bin/ksh -p +echo "\$(date) \$(basename \$0)" >> $OUTFILE +EOF + +cat << EOF > $ZEDLET_DIR/scrub_start-slow.sh +#!/bin/ksh -p +sleep 3 +echo "\$(date) \$(basename \$0)" >> $OUTFILE +EOF + +cat << EOF > $ZEDLET_DIR/scrub_finish-sync-slow.sh +#!/bin/ksh -p +sleep 3 +echo "\$(date) \$(basename \$0)" >> $OUTFILE +EOF + +cat << EOF > $ZEDLET_DIR/trim_start-async.sh +#!/bin/ksh -p +echo "\$(date) \$(basename \$0)" >> $OUTFILE +EOF + +for i in $OUR_ZEDLETS ; do + log_must chmod +x $ZEDLET_DIR/$i +done + +log_must zed_start + +# Do a scrub - it should be instantaneous. +log_must zpool scrub -w $TESTPOOL2 + +# Start off a trim immediately after scrubiung. The trim should be +# instantaneous and generate a trimp_start event. This will happen in parallel +# with the slow 'scrub_finish-sync-slow.sh' zedlet still running. +log_must zpool trim -w $TESTPOOL2 + +# Wait for scrub_finish event to happen for sanity. This is the *event*, not +# the completion of zedlets for the event. +log_must file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.trim_finish' 10 + +# At a minimum, scrub_start-slow.sh + scrub_finish-sync-slow.sh will take a +# total of 6 seconds to run, so wait 7 sec to be sure. +sleep 7 + +# If our zedlets were run in the right order, with sync correctly honored, you +# will see this ordering in $OUTFILE: +# +# Fri May 16 12:04:23 PDT 2025 scrub_start-async.sh +# Fri May 16 12:04:26 PDT 2025 scrub_start-slow.sh +# Fri May 16 12:04:31 PDT 2025 scrub_finish-sync-slow.sh +# Fri May 16 12:04:31 PDT 2025 trim_start-async.sh +# +# Check for this ordering + +# Get a list of just the script names in the order they were executed +# from OUTFILE +lines="$(echo $(grep -Eo '(scrub|trim)_.+\.sh$' $OUTFILE))" + +# Compare it to the ordering we expect +expected="\ +scrub_start-async.sh \ +scrub_start-slow.sh \ +scrub_finish-sync-slow.sh \ +trim_start-async.sh" +log_must test "$lines" == "$expected" + +log_pass "Verified synchronous zedlets" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh index ef49a5d50f6c..45848bec1f5a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh @@ -86,7 +86,7 @@ log_must zpool set autoreplace=on $TESTPOOL # Add some data to the pool log_must zfs create $TESTPOOL/fs -log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z +log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 R log_must zpool export $TESTPOOL # Record the partition UUID for later comparison diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh index a77957f32255..878b4e450340 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh @@ -119,7 +119,7 @@ log_must zpool set autoreplace=on $TESTPOOL # Add some data to the pool log_must zfs create $TESTPOOL/fs -log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z +log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 R log_must zpool export $TESTPOOL # Record the partition UUID for later comparison diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh index 1b3310edb98b..45b041503e22 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh @@ -67,7 +67,7 @@ log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1)) # Add some data to the pool log_must zfs create $TESTPOOL/fs MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)" -log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z +log_must fill_fs $MNTPOINT $PARITY 200 32768 100 R sync_pool $TESTPOOL # Replace the last child vdev to form a replacing vdev diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_on_probe_errors.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_on_probe_errors.ksh index 340994bb60c5..b5df1c7e37f8 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_on_probe_errors.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_on_probe_errors.ksh @@ -101,7 +101,7 @@ sync_pool $TESTPOOL log_must zfs create $TESTPOOL/fs MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)" SECONDS=0 -log_must fill_fs $MNTPOINT 1 200 4096 10 Z +log_must fill_fs $MNTPOINT 1 200 4096 10 R log_note "fill_fs took $SECONDS seconds" sync_pool $TESTPOOL diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh new file mode 100755 index 000000000000..86adef7ea032 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2025 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that we can make an xfs filesystem on a ZFS-backed loopback device. +# +# See: +# https://github.com/openzfs/zfs/pull/17298 +# https://github.com/openzfs/zfs/issues/17277 +# +# STRATEGY: +# 1. Make a pool +# 2. Make a file on the pool or create zvol +# 3. Mount the file/zvol behind a loopback device +# 4. Create & mount an xfs filesystem on the loopback device + +function cleanup +{ + if [ -d $TEST_BASE_DIR/mnt ] ; then + umount $TEST_BASE_DIR/mnt + log_must rmdir $TEST_BASE_DIR/mnt + fi + if [ -n "$DEV" ] ; then + log_must losetup -d $DEV + fi + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/file1 +} + +if [ ! -x "$(which mkfs.xfs)" ] ; then + log_unsupported "No mkfs.xfs binary" +fi + +if [ ! -d /lib/modules/$(uname -r)/kernel/fs/xfs ] && \ + ! grep -qE '\sxfs$' /proc/filesystems ; then + log_unsupported "No XFS kernel support" +fi + +log_assert "Make an xfs filesystem on a ZFS-backed loopback device" +log_onexit cleanup + +# fio options +export NUMJOBS=2 +export RUNTIME=3 +export PERF_RANDSEED=1234 +export PERF_COMPPERCENT=66 +export PERF_COMPCHUNK=0 +export BLOCKSIZE=128K +export SYNC_TYPE=0 +export FILE_SIZE=$(( 1024 * 1024 )) + +function do_test +{ + imgfile=$1 + log_note "Running test on $imgfile" + log_must losetup -f $imgfile + DEV=$(losetup --associated $imgfile | grep -Eo '^/dev/loop[0-9]+') + log_must mkfs.xfs $DEV + mkdir $TEST_BASE_DIR/mnt + log_must mount $DEV $TEST_BASE_DIR/mnt + export DIRECTORY=$TEST_BASE_DIR/mnt + + for d in 0 1 ; do + # fio options + export DIRECT=$d + log_must fio $FIO_SCRIPTS/mkfiles.fio + log_must fio $FIO_SCRIPTS/random_reads.fio + done + log_must umount $TEST_BASE_DIR/mnt + log_must rmdir $TEST_BASE_DIR/mnt + log_must losetup -d $DEV + DEV="" +} + +log_must truncate -s 1G $TEST_BASE_DIR/file1 +log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/file1 +log_must truncate -s 512M /$TESTPOOL2/img +do_test /$TESTPOOL2/img +log_must rm /$TESTPOOL2/img +log_must zfs create -V 512M $TESTPOOL2/vol + +blkdev="$ZVOL_DEVDIR/$TESTPOOL2/vol" +block_device_wait $blkdev +do_test $blkdev + +log_pass "Verified xfs filesystem on a ZFS-backed loopback device" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh index 3249bd93d5ce..a1da4a8631e1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh @@ -17,6 +17,7 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/refreserv/refreserv.cfg +. $STF_SUITE/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib # # DESCRIPTION: @@ -24,7 +25,7 @@ # # STRATEGY: # 1. Create a pool with a single raidz vdev -# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k] +# 2. For each block size [4k, 8k, 128k] # - create a volume # - fully overwrite it # - verify that referenced is less than or equal to reservation @@ -38,6 +39,7 @@ # 1. This test will use up to 14 disks but can cover the key concepts with # 5 disks. # 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely. +# Therefore, when creating the pool we specify 4Kn sectors. # verify_runnable "global" @@ -60,29 +62,10 @@ log_onexit cleanup poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" -# Testing tiny block sizes on ashift=12 pools causes so much size inflation -# that small test disks may fill before creating small volumes. However, -# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for -# testing the problems that arise from 4K and 8K blocks on ashift=12 pools. -if is_freebsd; then - bps=$(diskinfo -v ${alldisks[0]} | awk '/sectorsize/ { print $1 }') -elif is_linux; then - bps=$(lsblk -nrdo min-io /dev/${alldisks[0]}) -fi -log_must test "$bps" -eq 512 -o "$bps" -eq 4096 -case "$bps" in -512) - allshifts=(9 10 17) - maxpct=151 - ;; -4096) - allshifts=(12 13 17) - maxpct=110 - ;; -*) - log_fail "bytes/sector: $bps != (512|4096)" - ;; -esac +ashift=12 +allshifts=(12 13 17) +maxpct=110 + log_note "Testing in ashift=${allshifts[0]} mode" # This loop handles all iterations of steps 1 through 4 described in strategy @@ -99,18 +82,21 @@ for parity in 1 2 3; do continue fi - log_must zpool create -O compression=off "$TESTPOOL" "$raid" "${disks[@]}" + log_must zpool create -o ashift=$ashift "$TESTPOOL" "$raid" "${disks[@]}" for bits in "${allshifts[@]}"; do vbs=$((1 << bits)) log_note "Testing $raid-$ndisks volblocksize=$vbs" - vol=$TESTPOOL/$TESTVOL + vol=$TESTPOOL/$TESTVOL-$vbs + zdev=$ZVOL_DEVDIR/$vol log_must zfs create -V ${volsize}m \ -o volblocksize=$vbs "$vol" - block_device_wait "/dev/zvol/$vol" - log_must dd if=/dev/zero of=/dev/zvol/$vol \ - bs=1024k count=$volsize + block_device_wait $zdev + blockdev_exists $zdev + + log_must timeout 120 dd if=/dev/urandom of=$zdev \ + bs=1024k count=$volsize status=progress sync_pool $TESTPOOL ref=$(zfs get -Hpo value referenced "$vol") @@ -126,7 +112,7 @@ for parity in 1 2 3; do log_must test "$deltapct" -le $maxpct log_must_busy zfs destroy "$vol" - block_device_wait + blockdev_missing $zdev done log_must_busy zpool destroy "$TESTPOOL" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh new file mode 100755 index 000000000000..6820aba184b7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh @@ -0,0 +1,189 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/replacement/replacement.cfg + +# +# DESCRIPTION: +# Attaching disks while a disk is sitting out reads should pass +# +# STRATEGY: +# 1. Create raidz pools +# 2. Make one disk slower and trigger a read sit out for that disk +# 3. Start some random I/O +# 4. Attach a disk to the pool. +# 5. Verify the integrity of the file system and the resilvering. + +verify_runnable "global" + +save_tunable READ_SIT_OUT_SECS +set_tunable32 READ_SIT_OUT_SECS 120 +save_tunable SIT_OUT_CHECK_INTERVAL +set_tunable64 SIT_OUT_CHECK_INTERVAL 20 + +function cleanup +{ + restore_tunable READ_SIT_OUT_SECS + restore_tunable SIT_OUT_CHECK_INTERVAL + log_must zinject -c all + log_must zpool events -c + + if [[ -n "$child_pids" ]]; then + for wait_pid in $child_pids; do + kill $wait_pid + done + fi + + if poolexists $TESTPOOL1; then + destroy_pool $TESTPOOL1 + fi + + [[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/* +} + +log_assert "Replacing a disk during I/O with a sit out completes." + +options="" +options_display="default options" + +log_onexit cleanup + +[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE " + +[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE " + +[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT " + +[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED " + +[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET " + +options="$options -r " + +[[ -n "$options" ]] && options_display=$options + +child_pids="" + +function attach_test +{ + typeset vdev=$1 + typeset disk=$2 + + typeset i=0 + while [[ $i -lt $iters ]]; do + log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i" + file_trunc $options $TESTDIR/$TESTFILE.$i & + typeset pid=$! + + sleep 1 + + child_pids="$child_pids $pid" + ((i = i + 1)) + done + + # attach disk with a slow drive still present + SECONDS=0 + log_must zpool attach -w $TESTPOOL1 $vdev $disk + log_note took $SECONDS seconds to attach disk + + for wait_pid in $child_pids + do + kill $wait_pid + done + child_pids="" + + log_must zinject -c all + log_must zpool export $TESTPOOL1 + log_must zpool import -d $TESTDIR $TESTPOOL1 + log_must zfs umount $TESTPOOL1/$TESTFS1 + log_must zdb -cdui $TESTPOOL1/$TESTFS1 + log_must zfs mount $TESTPOOL1/$TESTFS1 + verify_pool $TESTPOOL1 +} + +DEVSIZE="150M" +specials_list="" +i=0 +while [[ $i != 10 ]]; do + truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i + specials_list="$specials_list $TESTDIR/$TESTFILE1.$i" + + ((i = i + 1)) +done + +slow_disk=$TESTDIR/$TESTFILE1.3 +log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE + +# Test file size in MB +count=200 + +for type in "raidz1" "raidz2" "raidz3" ; do + create_pool $TESTPOOL1 $type $specials_list + log_must zpool set autosit=on $TESTPOOL1 "${type}-0" + log_must zfs create -o primarycache=none -o recordsize=512K \ + $TESTPOOL1/$TESTFS1 + log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1 + + log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count + + # Make one disk 100ms slower to trigger a sit out + log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1 + + # Do some reads and wait for sit out on slow disk + SECONDS=0 + typeset -i size=0 + for i in $(seq 1 $count) ; do + dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null + size=$i + + sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk) + if [[ "$sit_out" == "on" ]] ; then + break + fi + done + + log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on" + log_note took $SECONDS seconds to reach sit out reading ${size}M + log_must zpool status -s $TESTPOOL1 + + typeset top=$(zpool status -j | jq -r ".pools.$TESTPOOL1.vdevs[].vdevs[].name") + attach_test $top $TESTDIR/$REPLACEFILE + + log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\"" + + destroy_pool $TESTPOOL1 + log_must rm -rf /$TESTPOOL1 +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh new file mode 100755 index 000000000000..4109dbaf45ac --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh @@ -0,0 +1,199 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/replacement/replacement.cfg + +# +# DESCRIPTION: +# Replacing disks while a disk is sitting out reads should pass +# +# STRATEGY: +# 1. Create raidz and draid pools +# 2. Make one disk slower and trigger a read sit out for that disk +# 3. Start some random I/O +# 4. Replace a disk in the pool with another disk. +# 5. Verify the integrity of the file system and the resilvering. +# + +verify_runnable "global" + +save_tunable READ_SIT_OUT_SECS +set_tunable32 READ_SIT_OUT_SECS 120 +save_tunable SIT_OUT_CHECK_INTERVAL +set_tunable64 SIT_OUT_CHECK_INTERVAL 20 + +function cleanup +{ + restore_tunable READ_SIT_OUT_SECS + restore_tunable SIT_OUT_CHECK_INTERVAL + log_must zinject -c all + log_must zpool events -c + + if [[ -n "$child_pids" ]]; then + for wait_pid in $child_pids + do + kill $wait_pid + done + fi + + if poolexists $TESTPOOL1; then + destroy_pool $TESTPOOL1 + fi + + [[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/* +} + +log_assert "Replacing a disk during I/O with a sit out completes." + +options="" +options_display="default options" + +log_onexit cleanup + +[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE " + +[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE " + +[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT " + +[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED " + +[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET " + +options="$options -r " + +[[ -n "$options" ]] && options_display=$options + +child_pids="" + +function replace_test +{ + typeset -i iters=2 + typeset disk1=$1 + typeset disk2=$2 + typeset repl_type=$3 + + typeset i=0 + while [[ $i -lt $iters ]]; do + log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i" + file_trunc $options $TESTDIR/$TESTFILE.$i & + typeset pid=$! + + sleep 1 + + child_pids="$child_pids $pid" + ((i = i + 1)) + done + + typeset repl_flag="-w" + if [[ "$repl_type" == "seq" ]]; then + repl_flag="-ws" + fi + # replace disk with a slow drive still present + SECONDS=0 + log_must zpool replace $repl_flag $TESTPOOL1 $disk1 $disk2 + log_note took $SECONDS seconds to replace disk + + for wait_pid in $child_pids + do + kill $wait_pid + done + child_pids="" + + log_must zinject -c all + log_must zpool export $TESTPOOL1 + log_must zpool import -d $TESTDIR $TESTPOOL1 + log_must zfs umount $TESTPOOL1/$TESTFS1 + log_must zdb -cdui $TESTPOOL1/$TESTFS1 + log_must zfs mount $TESTPOOL1/$TESTFS1 + verify_pool $TESTPOOL1 +} + +DEVSIZE="150M" +specials_list="" +i=0 +while [[ $i != 10 ]]; do + log_must truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i + specials_list="$specials_list $TESTDIR/$TESTFILE1.$i" + + ((i = i + 1)) +done + +slow_disk=$TESTDIR/$TESTFILE1.3 +log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE + +# Test file size in MB +count=400 + +for type in "raidz2" "raidz3" "draid2"; do + create_pool $TESTPOOL1 $type $specials_list + log_must zpool set autosit=on $TESTPOOL1 "${type}-0" + log_must zfs create -o primarycache=none -o recordsize=512K \ + $TESTPOOL1/$TESTFS1 + log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1 + + log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count + + # Make one disk 100ms slower to trigger a sit out + log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1 + + # Do some reads and wait for sit out on slow disk + SECONDS=0 + typeset -i size=0 + for i in $(seq 1 $count) ; do + dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null + size=$i + + sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk) + if [[ "$sit_out" == "on" ]] ; then + break + fi + done + log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on" + log_note took $SECONDS seconds to reach sit out reading ${size}M + log_must zpool status -s $TESTPOOL1 + + typeset repl_type="replace" + if [[ "$type" == "draid2" && $((RANDOM % 2)) -eq 0 ]]; then + repl_type="seq" + fi + replace_test $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE $repl_type + + log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\"" + + destroy_pool $TESTPOOL1 + log_must rm -rf /$TESTPOOL1 +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/setup.ksh index 26153aafbc02..0e79e9b8b70c 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/setup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/setup.ksh @@ -39,6 +39,6 @@ verify_runnable "global" # create a pool without any features -log_must mkfile 128m $TMPDEV +log_must truncate -s $MINVDEVSIZE $TMPDEV log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_readonly_pool.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_readonly_pool.ksh index d6bd69b7e134..e81d07794689 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_readonly_pool.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_readonly_pool.ksh @@ -35,17 +35,19 @@ verify_runnable "global" -TESTFILE="$TESTDIR/file.bin" - log_assert "User accounting upgrade should not be executed on readonly pool" log_onexit cleanup_upgrade # 1. Create a pool with the feature@userobj_accounting disabled to simulate # a legacy pool from a previous ZFS version. -log_must zpool create -d -m $TESTDIR $TESTPOOL $TMPDEV +log_must zpool create -d $TESTPOOL $TMPDEV +log_must zfs create $TESTPOOL/$TESTFS + +MNTPNT=$(get_prop mountpoint $TESTPOOL/$TESTFS) +TESTFILE="$MNTPNT/file.bin" # 2. Create a file on the "legecy" dataset -log_must touch $TESTDIR/file.bin +log_must touch $TESTFILE # 3. Enable feature@userobj_accounting on the pool and verify it is only # "enabled" and not "active": upgrading starts when the filesystem is mounted @@ -54,12 +56,12 @@ log_must test "enabled" == "$(get_pool_prop 'feature@userobj_accounting' $TESTPO # 4. Export the pool and re-import is readonly, without mounting any filesystem log_must zpool export $TESTPOOL -log_must zpool import -o readonly=on -N -d "$(dirname $TMPDEV)" $TESTPOOL +log_must zpool import -o readonly=on -N -d $TEST_BASE_DIR $TESTPOOL # 5. Try to mount the root dataset manually without the "ro" option, then verify # filesystem status and the pool feature status (not "active") to ensure the # pool "readonly" status is enforced. -log_must mount -t zfs -o zfsutil $TESTPOOL $TESTDIR +log_must zfs mount -R $TESTPOOL log_must stat "$TESTFILE" log_mustnot touch "$TESTFILE" log_must test "enabled" == "$(get_pool_prop 'feature@userobj_accounting' $TESTPOOL)" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/xattr/xattr_014_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/xattr/xattr_014_pos.ksh new file mode 100755 index 000000000000..d4c9a0a41816 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/xattr/xattr_014_pos.ksh @@ -0,0 +1,53 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/xattr/xattr_common.kshlib + +# +# DESCRIPTION: +# The default xattr should be shown as 'sa', not 'on', for clarity. +# +# STRATEGY: +# 1. Create a filesystem. +# 2. Verify that the xattra is shown as 'sa'. +# 3. Manually set the value to 'dir', 'sa', 'on', and 'off'. +# 4. Verify that it is shown as 'dir', 'sa', 'sa', and 'off. +# + +log_assert "The default and specific xattr values are displayed correctly." + +set -A args "dir" "sa" "on" "off" +set -A display "dir" "sa" "sa" "off" + +log_must eval "[[ 'sa' == '$(zfs get -Hpo value xattr $TESTPOOL)' ]]" + +for i in `seq 0 3`; do + log_must zfs set xattr="${args[$i]}" $TESTPOOL + log_must eval "[[ '${display[$i]}' == '$(zfs get -Hpo value xattr $TESTPOOL)' ]]" +done +log_pass "The default and specific xattr values are displayed correctly." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh index 571a698eb63a..502ebada22dc 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh @@ -50,17 +50,53 @@ fi typeset datafile1="$(mktemp -t zvol_misc_fua1.XXXXXX)" typeset datafile2="$(mktemp -t zvol_misc_fua2.XXXXXX)" +typeset datafile3="$(mktemp -t zvol_misc_fua3_log.XXXXXX)" typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL +typeset DISK1=${DISKS%% *} function cleanup { - rm "$datafile1" "$datafile2" + log_must zpool remove $TESTPOOL $datafile3 + rm "$datafile1" "$datafile2" "$datafile2" +} + +# Prints the total number of sync writes for a vdev +# $1: vdev +function get_sync +{ + zpool iostat -p -H -v -r $TESTPOOL $1 | \ + awk '/[0-9]+$/{s+=$4+$5} END{print s}' } function do_test { # Wait for udev to create symlinks to our zvol block_device_wait $zvolpath + # Write using sync (creates FLUSH calls after writes, but not FUA) + old_vdev_writes=$(get_sync $DISK1) + old_log_writes=$(get_sync $datafile3) + + log_must fio --name=write_iops --size=5M \ + --ioengine=libaio --verify=0 --bs=4K \ + --iodepth=1 --rw=randwrite --group_reporting=1 \ + --filename=$zvolpath --sync=1 + + vdev_writes=$(( $(get_sync $DISK1) - $old_vdev_writes)) + log_writes=$(( $(get_sync $datafile3) - $old_log_writes)) + + # When we're doing sync writes, we should see many more writes go to + # the log vs the first vdev. Experiments show anywhere from a 160-320x + # ratio of writes to the log vs the first vdev (due to some straggler + # writes to the first vdev). + # + # Check that we have a large ratio (100x) of sync writes going to the + # log device + ratio=$(($log_writes / $vdev_writes)) + log_note "Got $log_writes log writes, $vdev_writes vdev writes." + if [ $ratio -lt 100 ] ; then + log_fail "Expected > 100x more log writes than vdev writes. " + fi + # Create a data file log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5 @@ -81,6 +117,8 @@ log_assert "Verify that a ZFS volume can do Force Unit Access (FUA)" log_onexit cleanup log_must zfs set compression=off $TESTPOOL/$TESTVOL +log_must truncate -s 100M $datafile3 +log_must zpool add $TESTPOOL log $datafile3 log_note "Testing without blk-mq" |