diff options
author | Martin Matuska <mm@FreeBSD.org> | 2024-10-20 18:08:55 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2024-10-20 19:26:56 +0000 |
commit | 87bf66d4a7488c496af110d4d05cc0273d49f82e (patch) | |
tree | b70778fb6c20fb7011a18ae97116ab0f258e0867 | |
parent | 6244b9dc4a03e87246bad1c94067dfe54ff4cdbc (diff) | |
parent | b2f6de7b58f81a4894ba26d87865a15e6115a1c2 (diff) |
zfs: merge openzfs/zfs@b2f6de7b5
Notable upstream pull request merges:
#16598 b4e4cbeb2 Always validate checksums for Direct I/O reads
#16603 cefef28e9 vdev_id: multi-lun disks & slot num zero pad
#16613 ab777f436 Return boolean_t in inline functions of
lib/libspl/include/sys/uio.h
#16616 efeb60b86 FreeBSD: ignore some includes when not building kernel
#16617 ca0141f32 zpool/zfs: restore -V & --version options
#16632 7bf525530 zpool/zfs: allow --json wherever -j is allowed
#16635 7e4be9275 zdb: fix printf format in dump_zap()
#16640 48dfe3974 Fallback to strerror() when strerror_l() isn't available
#16650 38a04f0a7 freebsd: Use compiler.h from FreeBSD's base's linuxkpi
#16664 fba6a9069 zfs_debug: Restore log size limit for userspace
#16665 fba6a9069 Fix compile-time warnings caused by duplicate struct
typedefs
#16667 b2f6de7b5 zdb: show bp in uberblock dump
Obtained from: OpenZFS
OpenZFS commit: b2f6de7b58f81a4894ba26d87865a15e6115a1c2
74 files changed, 1112 insertions, 344 deletions
diff --git a/sys/contrib/openzfs/.github/workflows/codeql.yml b/sys/contrib/openzfs/.github/workflows/codeql.yml index 2656a20fea0d..e975d7dd00b9 100644 --- a/sys/contrib/openzfs/.github/workflows/codeql.yml +++ b/sys/contrib/openzfs/.github/workflows/codeql.yml @@ -11,7 +11,7 @@ concurrency: jobs: analyze: name: Analyze - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: actions: read contents: read diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh index ebd80a2f98c1..f838da34efff 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh @@ -18,19 +18,21 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N "" # we expect RAM shortage cat << EOF | sudo tee /etc/ksmtuned.conf > /dev/null +# /etc/ksmtuned.conf - Configuration file for ksmtuned # https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/7/html/virtualization_tuning_and_optimization_guide/chap-ksm KSM_MONITOR_INTERVAL=60 # Millisecond sleep between ksm scans for 16Gb server. # Smaller servers sleep more, bigger sleep less. -KSM_SLEEP_MSEC=10 -KSM_NPAGES_BOOST=300 -KSM_NPAGES_DECAY=-50 -KSM_NPAGES_MIN=64 -KSM_NPAGES_MAX=2048 - -KSM_THRES_COEF=25 -KSM_THRES_CONST=2048 +KSM_SLEEP_MSEC=30 + +KSM_NPAGES_BOOST=0 +KSM_NPAGES_DECAY=0 +KSM_NPAGES_MIN=1000 +KSM_NPAGES_MAX=25000 + +KSM_THRES_COEF=80 +KSM_THRES_CONST=8192 LOGFILE=/var/log/ksmtuned.log DEBUG=1 diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 923c38c0f937..84e13832d10f 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -14,7 +14,7 @@ OSv=$OS # compressed with .zst extension REPO="https://github.com/mcmilk/openzfs-freebsd-images" -FREEBSD="$REPO/releases/download/v2024-09-16" +FREEBSD="$REPO/releases/download/v2024-10-05" URLzs="" # Ubuntu mirrors @@ -62,33 +62,45 @@ case "$OS" in OSv="fedora39" URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2" ;; - freebsd13r) - OSNAME="FreeBSD 13.4-RELEASE" + freebsd13-3r) + OSNAME="FreeBSD 13.3-RELEASE" OSv="freebsd13.0" - URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst" + URLzs="$FREEBSD/amd64-freebsd-13.3-RELEASE.qcow2.zst" BASH="/usr/local/bin/bash" NIC="rtl8139" ;; - freebsd13) - OSNAME="FreeBSD 13.4-STABLE" + freebsd13-4r) + OSNAME="FreeBSD 13.4-RELEASE" OSv="freebsd13.0" - URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst" + URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst" BASH="/usr/local/bin/bash" NIC="rtl8139" ;; - freebsd14r) + freebsd14-0r) + OSNAME="FreeBSD 14.0-RELEASE" + OSv="freebsd14.0" + URLzs="$FREEBSD/amd64-freebsd-14.0-RELEASE.qcow2.zst" + BASH="/usr/local/bin/bash" + ;; + freebsd14-1r) OSNAME="FreeBSD 14.1-RELEASE" OSv="freebsd14.0" URLzs="$FREEBSD/amd64-freebsd-14.1-RELEASE.qcow2.zst" BASH="/usr/local/bin/bash" ;; - freebsd14) + freebsd13-4s) + OSNAME="FreeBSD 13.4-STABLE" + OSv="freebsd13.0" + URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst" + BASH="/usr/local/bin/bash" + ;; + freebsd14-1s) OSNAME="FreeBSD 14.1-STABLE" OSv="freebsd14.0" URLzs="$FREEBSD/amd64-freebsd-14.1-STABLE.qcow2.zst" BASH="/usr/local/bin/bash" ;; - freebsd15) + freebsd15-0c) OSNAME="FreeBSD 15.0-CURRENT" OSv="freebsd14.0" URLzs="$FREEBSD/amd64-freebsd-15.0-CURRENT.qcow2.zst" diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh index 7acb67a27920..bc40e8894b22 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh @@ -14,17 +14,21 @@ PID=$(pidof /usr/bin/qemu-system-x86_64) tail --pid=$PID -f /dev/null sudo virsh undefine openzfs -# definitions of per operating system +# default values per test vm: +VMs=2 +CPU=2 + +# cpu pinning +CPUSET=("0,1" "2,3") + case "$OS" in freebsd*) - VMs=2 - CPU=3 + # FreeBSD can't be optimized via ksmtuned RAM=6 ;; *) - VMs=2 - CPU=3 - RAM=7 + # Linux can be optimized via ksmtuned + RAM=8 ;; esac @@ -73,6 +77,7 @@ EOF --cpu host-passthrough \ --virt-type=kvm --hvm \ --vcpus=$CPU,sockets=1 \ + --cpuset=${CPUSET[$((i-1))]} \ --memory $((1024*RAM)) \ --memballoon model=virtio \ --graphics none \ diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh index cae287ac1073..737dda01b565 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh @@ -11,12 +11,10 @@ function output() { } function outfile() { - test -s "$1" || return cat "$1" >> "out-$logfile.md" } function outfile_plain() { - test -s "$1" || return output "<pre>" cat "$1" >> "out-$logfile.md" output "</pre>" @@ -45,6 +43,8 @@ if [ ! -f out-1.md ]; then tar xf "$tarfile" test -s env.txt || continue source env.txt + # when uname.txt is there, the other files are also ok + test -s uname.txt || continue output "\n## Functional Tests: $OSNAME\n" outfile_plain uname.txt outfile_plain summary.txt diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index 8922701f9899..f819e9938e31 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -22,8 +22,8 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13", "freebsd13r", "freebsd14", "freebsd14r", "ubuntu20", "ubuntu22", "ubuntu24"]' - QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13", "freebsd14", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]' + QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]' # determine CI type when running on PR ci_type="full" if ${{ github.event_name == 'pull_request' }}; then @@ -46,10 +46,12 @@ jobs: strategy: fail-fast: false matrix: - # all: - # os: [almalinux8, almalinux9, archlinux, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24] - # openzfs: - # os: [almalinux8, almalinux9, centos-stream9, debian11, debian12, fedora39, fedora40, freebsd13, freebsd13r, freebsd14, freebsd14r, ubuntu20, ubuntu22, ubuntu24] + # rhl: almalinux8, almalinux9, centos-stream9, fedora39, fedora40 + # debian: debian11, debian12, ubuntu20, ubuntu22, ubuntu24 + # misc: archlinux, tumbleweed + # FreeBSD Release: freebsd13-3r, freebsd13-4r, freebsd14-0r, freebsd14-1r + # FreeBSD Stable: freebsd13-4s, freebsd14-1s + # FreeBSD Current: freebsd15-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 steps: diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index c2eff4ba0bbd..185cca4a44d4 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,8 +1,8 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.3.0 -Release: rc1 +Version: 2.3.99 +Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index a72b80a93732..2121f70b2b9a 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -4266,6 +4266,10 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) (void) printf("\ttimestamp = %llu UTC = %s", (u_longlong_t)ub->ub_timestamp, ctime(×tamp)); + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); + (void) printf("\tbp = %s\n", blkbuf); + (void) printf("\tmmp_magic = %016llx\n", (u_longlong_t)ub->ub_mmp_magic); if (MMP_VALID(ub)) { diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 34c693fbcb0f..4116cdb51852 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -2162,6 +2162,7 @@ zfs_do_get(int argc, char **argv) cb.cb_type = ZFS_TYPE_DATASET; struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, {"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT}, {0, 0, 0, 0} }; @@ -3852,6 +3853,7 @@ zfs_do_list(int argc, char **argv) nvlist_t *data = NULL; struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, {"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT}, {0, 0, 0, 0} }; @@ -7436,9 +7438,15 @@ share_mount(int op, int argc, char **argv) uint_t nthr; jsobj = data = item = NULL; + struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ajRlvo:Of" : "al")) - != -1) { + while ((c = getopt_long(argc, argv, + op == OP_MOUNT ? ":ajRlvo:Of" : "al", + op == OP_MOUNT ? long_options : NULL, NULL)) != -1) { switch (c) { case 'a': do_all = 1; @@ -8374,8 +8382,14 @@ zfs_do_channel_program(int argc, char **argv) boolean_t sync_flag = B_TRUE, json_output = B_FALSE; zpool_handle_t *zhp; + struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "nt:m:j")) != -1) { + while ((c = getopt_long(argc, argv, "nt:m:j", long_options, + NULL)) != -1) { switch (c) { case 't': case 'm': { @@ -9083,7 +9097,13 @@ zfs_do_version(int argc, char **argv) int c; nvlist_t *jsobj = NULL, *zfs_ver = NULL; boolean_t json = B_FALSE; - while ((c = getopt(argc, argv, "j")) != -1) { + + struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, + {0, 0, 0, 0} + }; + + while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) { switch (c) { case 'j': json = B_TRUE; @@ -9187,7 +9207,7 @@ main(int argc, char **argv) * Special case '-V|--version' */ if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0)) - return (zfs_do_version(argc, argv)); + return (zfs_version_print() != 0); /* * Special case 'help' diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index aa7da68aa683..ea180f6b705e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -7340,6 +7340,7 @@ zpool_do_list(int argc, char **argv) current_prop_type = ZFS_TYPE_POOL; struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, {"json-pool-key-guid", no_argument, NULL, ZPOOL_OPTION_POOL_KEY_GUID}, @@ -9224,6 +9225,12 @@ vdev_stats_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, } } + if (cb->cb_print_dio_verify) { + nice_num_str_nvlist(vds, "dio_verify_errors", + vs->vs_dio_verify_errors, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_1024); + } + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, ¬present) == 0) { nice_num_str_nvlist(vds, ZPOOL_CONFIG_NOT_PRESENT, @@ -10975,6 +10982,7 @@ zpool_do_status(int argc, char **argv) struct option long_options[] = { {"power", no_argument, NULL, ZPOOL_OPTION_POWER}, + {"json", no_argument, NULL, 'j'}, {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, {"json-flat-vdevs", no_argument, NULL, ZPOOL_OPTION_JSON_FLAT_VDEVS}, @@ -12583,6 +12591,7 @@ zpool_do_get(int argc, char **argv) current_prop_type = cb.cb_type; struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, {"json-pool-key-guid", no_argument, NULL, ZPOOL_OPTION_POOL_KEY_GUID}, @@ -13497,7 +13506,12 @@ zpool_do_version(int argc, char **argv) int c; nvlist_t *jsobj = NULL, *zfs_ver = NULL; boolean_t json = B_FALSE; - while ((c = getopt(argc, argv, "j")) != -1) { + + struct option long_options[] = { + {"json", no_argument, NULL, 'j'}, + }; + + while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) { switch (c) { case 'j': json = B_TRUE; @@ -13613,7 +13627,7 @@ main(int argc, char **argv) * Special case '-V|--version' */ if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0)) - return (zpool_do_version(argc, argv)); + return (zfs_version_print() != 0); /* * Special case 'help' diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index 523f280aae1a..4a7959ebfca5 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -6717,6 +6717,17 @@ out: * * Only after a full scrub has been completed is it safe to start injecting * data corruption. See the comment in zfs_fault_inject(). + * + * EBUSY may be returned for the following six cases. It's the callers + * responsibility to handle them accordingly. + * + * Current state Requested + * 1. Normal Scrub Running Normal Scrub or Error Scrub + * 2. Normal Scrub Paused Error Scrub + * 3. Normal Scrub Paused Pause Normal Scrub + * 4. Error Scrub Running Normal Scrub or Error Scrub + * 5. Error Scrub Paused Pause Error Scrub + * 6. Resilvering Anything else */ static int ztest_scrub_impl(spa_t *spa) @@ -8082,8 +8093,14 @@ ztest_raidz_expand_check(spa_t *spa) (void) printf("verifying an interrupted raidz " "expansion using a pool scrub ...\n"); } + /* Will fail here if there is non-recoverable corruption detected */ - VERIFY0(ztest_scrub_impl(spa)); + int error = ztest_scrub_impl(spa); + if (error == EBUSY) + error = 0; + + VERIFY0(error); + if (ztest_opts.zo_verbose >= 1) { (void) printf("raidz expansion scrub check complete\n"); } diff --git a/sys/contrib/openzfs/config/deb.am b/sys/contrib/openzfs/config/deb.am index 4d86a1b70615..9e58e1905b73 100644 --- a/sys/contrib/openzfs/config/deb.am +++ b/sys/contrib/openzfs/config/deb.am @@ -58,9 +58,9 @@ deb-utils: deb-local rpm-utils-initramfs pkg1=$${name}-$${version}.$${arch}.rpm; \ pkg2=libnvpair3-$${version}.$${arch}.rpm; \ pkg3=libuutil3-$${version}.$${arch}.rpm; \ - pkg4=libzfs5-$${version}.$${arch}.rpm; \ - pkg5=libzpool5-$${version}.$${arch}.rpm; \ - pkg6=libzfs5-devel-$${version}.$${arch}.rpm; \ + pkg4=libzfs6-$${version}.$${arch}.rpm; \ + pkg5=libzpool6-$${version}.$${arch}.rpm; \ + pkg6=libzfs6-devel-$${version}.$${arch}.rpm; \ pkg7=$${name}-test-$${version}.$${arch}.rpm; \ pkg8=$${name}-dracut-$${version}.noarch.rpm; \ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \ @@ -72,7 +72,7 @@ deb-utils: deb-local rpm-utils-initramfs path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \ echo "#!$(SHELL)" > $${path_prepend}/dh_shlibdeps; \ echo "`which dh_shlibdeps` -- \ - -xlibuutil3linux -xlibnvpair3linux -xlibzfs5linux -xlibzpool5linux" \ + -xlibuutil3linux -xlibnvpair3linux -xlibzfs6linux -xlibzpool6linux" \ >> $${path_prepend}/dh_shlibdeps; \ ## These -x arguments are passed to dpkg-shlibdeps, which exclude the ## Debianized packages from the auto-generated dependencies of the new debs, @@ -93,13 +93,17 @@ debian: cp -r contrib/debian debian; chmod +x debian/rules; native-deb-utils: native-deb-local debian + while [ -f debian/deb-build.lock ]; do sleep 1; done; \ + echo "native-deb-utils" > debian/deb-build.lock; \ cp contrib/debian/control debian/control; \ - $(DPKGBUILD) -b -rfakeroot -us -uc; + $(DPKGBUILD) -b -rfakeroot -us -uc; \ + $(RM) -f debian/deb-build.lock native-deb-kmod: native-deb-local debian + while [ -f debian/deb-build.lock ]; do sleep 1; done; \ + echo "native-deb-kmod" > debian/deb-build.lock; \ sh scripts/make_gitrev.sh; \ - fakeroot debian/rules override_dh_binary-modules; + fakeroot debian/rules override_dh_binary-modules; \ + $(RM) -f debian/deb-build.lock native-deb: native-deb-utils native-deb-kmod - -.NOTPARALLEL: native-deb native-deb-utils native-deb-kmod diff --git a/sys/contrib/openzfs/config/user.m4 b/sys/contrib/openzfs/config/user.m4 index badd920d2b8a..4e31745a2abc 100644 --- a/sys/contrib/openzfs/config/user.m4 +++ b/sys/contrib/openzfs/config/user.m4 @@ -33,7 +33,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV ZFS_AC_CONFIG_USER_ZFSEXEC - AC_CHECK_FUNCS([execvpe issetugid mlockall strlcat strlcpy gettid]) + AC_CHECK_FUNCS([execvpe issetugid mlockall strerror_l strlcat strlcpy gettid]) AC_SUBST(RM) ]) diff --git a/sys/contrib/openzfs/contrib/debian/Makefile.am b/sys/contrib/openzfs/contrib/debian/Makefile.am index f76b59645ead..99d512312df6 100644 --- a/sys/contrib/openzfs/contrib/debian/Makefile.am +++ b/sys/contrib/openzfs/contrib/debian/Makefile.am @@ -12,14 +12,14 @@ dist_noinst_DATA += %D%/openzfs-libpam-zfs.postinst dist_noinst_DATA += %D%/openzfs-libpam-zfs.prerm dist_noinst_DATA += %D%/openzfs-libuutil3.docs dist_noinst_DATA += %D%/openzfs-libuutil3.install.in -dist_noinst_DATA += %D%/openzfs-libzfs4.docs -dist_noinst_DATA += %D%/openzfs-libzfs4.install.in +dist_noinst_DATA += %D%/openzfs-libzfs6.docs +dist_noinst_DATA += %D%/openzfs-libzfs6.install.in dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.docs dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.install.in dist_noinst_DATA += %D%/openzfs-libzfs-dev.docs dist_noinst_DATA += %D%/openzfs-libzfs-dev.install.in -dist_noinst_DATA += %D%/openzfs-libzpool5.docs -dist_noinst_DATA += %D%/openzfs-libzpool5.install.in +dist_noinst_DATA += %D%/openzfs-libzpool6.docs +dist_noinst_DATA += %D%/openzfs-libzpool6.install.in dist_noinst_DATA += %D%/openzfs-python3-pyzfs.install dist_noinst_DATA += %D%/openzfs-zfs-dkms.config dist_noinst_DATA += %D%/openzfs-zfs-dkms.dkms diff --git a/sys/contrib/openzfs/contrib/debian/clean b/sys/contrib/openzfs/contrib/debian/clean index 3100d693aeba..4f52d01b8108 100644 --- a/sys/contrib/openzfs/contrib/debian/clean +++ b/sys/contrib/openzfs/contrib/debian/clean @@ -6,6 +6,6 @@ contrib/pyzfs/libzfs_core/bindings/__pycache__/ contrib/pyzfs/pyzfs.egg-info/ debian/openzfs-libnvpair3.install debian/openzfs-libuutil3.install -debian/openzfs-libzfs4.install +debian/openzfs-libzfs6.install debian/openzfs-libzfs-dev.install -debian/openzfs-libzpool5.install +debian/openzfs-libzpool6.install diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control index e56fbf0f1c93..6829c0ccdf93 100644 --- a/sys/contrib/openzfs/contrib/debian/control +++ b/sys/contrib/openzfs/contrib/debian/control @@ -78,9 +78,9 @@ Architecture: linux-any Depends: libssl-dev | libssl1.0-dev, openzfs-libnvpair3 (= ${binary:Version}), openzfs-libuutil3 (= ${binary:Version}), - openzfs-libzfs4 (= ${binary:Version}), + openzfs-libzfs6 (= ${binary:Version}), openzfs-libzfsbootenv1 (= ${binary:Version}), - openzfs-libzpool5 (= ${binary:Version}), + openzfs-libzpool6 (= ${binary:Version}), ${misc:Depends} Replaces: libzfslinux-dev Conflicts: libzfslinux-dev @@ -90,18 +90,18 @@ Description: OpenZFS filesystem development files for Linux libraries of OpenZFS filesystem. . This package includes the development files of libnvpair3, libuutil3, - libzpool5 and libzfs4. + libzpool6 and libzfs6. -Package: openzfs-libzfs4 +Package: openzfs-libzfs6 Section: contrib/libs Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} # The libcurl4 is loaded through dlopen("libcurl.so.4"). # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521 Recommends: libcurl4 -Breaks: libzfs2, libzfs4 -Replaces: libzfs2, libzfs4, libzfs4linux -Conflicts: libzfs4linux +Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux +Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux +Conflicts: libzfs6linux Description: OpenZFS filesystem library for Linux - general support OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -123,13 +123,13 @@ Description: OpenZFS filesystem library for Linux - label info support . The zfsbootenv library provides support for modifying ZFS label information. -Package: openzfs-libzpool5 +Package: openzfs-libzpool6 Section: contrib/libs Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} -Breaks: libzpool2, libzpool5 -Replaces: libzpool2, libzpool5, libzpool5linux -Conflicts: libzpool5linux +Breaks: libzpool2, libzpool5, libzpool5linux, libzpool6linux +Replaces: libzpool2, libzpool5, libzpool5linux, libzpool6linux +Conflicts: libzpool6linux Description: OpenZFS pool library for Linux OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -246,8 +246,8 @@ Architecture: linux-any Pre-Depends: ${misc:Pre-Depends} Depends: openzfs-libnvpair3 (= ${binary:Version}), openzfs-libuutil3 (= ${binary:Version}), - openzfs-libzfs4 (= ${binary:Version}), - openzfs-libzpool5 (= ${binary:Version}), + openzfs-libzfs6 (= ${binary:Version}), + openzfs-libzpool6 (= ${binary:Version}), python3, ${misc:Depends}, ${shlibs:Depends} diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.docs b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.docs index 4302f1b2ab6a..4302f1b2ab6a 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.docs +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.docs diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in index 6765aaee59cc..6765aaee59cc 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.docs b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.docs index 4302f1b2ab6a..4302f1b2ab6a 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.docs +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.docs diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in index b9e872df9ba8..b9e872df9ba8 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.install.in +++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install index d51e4ef003e6..546745930bff 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install @@ -98,6 +98,7 @@ usr/share/man/man8/zpool-attach.8 usr/share/man/man8/zpool-checkpoint.8 usr/share/man/man8/zpool-clear.8 usr/share/man/man8/zpool-create.8 +usr/share/man/man8/zpool-ddtprune.8 usr/share/man/man8/zpool-destroy.8 usr/share/man/man8/zpool-detach.8 usr/share/man/man8/zpool-ddtprune.8 @@ -113,6 +114,7 @@ usr/share/man/man8/zpool-list.8 usr/share/man/man8/zpool-offline.8 usr/share/man/man8/zpool-online.8 usr/share/man/man8/zpool-prefetch.8 +usr/share/man/man8/zpool-prefetch.8 usr/share/man/man8/zpool-reguid.8 usr/share/man/man8/zpool-remove.8 usr/share/man/man8/zpool-reopen.8 diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index 0a2bd2efda7a..c569b2528368 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -344,7 +344,7 @@ mount_fs() # Need the _original_ datasets mountpoint! mountpoint=$(get_fs_value "$fs" mountpoint) - ZFS_CMD="mount -o zfsutil -t zfs" + ZFS_CMD="mount.zfs -o zfsutil" if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then # Can't use the mountpoint property. Might be one of our # clones. Check the 'org.zol:mountpoint' property set in @@ -359,9 +359,8 @@ mount_fs() # isn't the root fs. return 0 fi - # Don't use mount.zfs -o zfsutils for legacy mountpoint if [ "$mountpoint" = "legacy" ]; then - ZFS_CMD="mount -t zfs" + ZFS_CMD="mount.zfs" fi # Last hail-mary: Hope 'rootmnt' is set! mountpoint="" diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h index e2108ceeaa44..f8712340cc5e 100644 --- a/sys/contrib/openzfs/include/libzutil.h +++ b/sys/contrib/openzfs/include/libzutil.h @@ -276,7 +276,11 @@ _LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv, * Thread-safe strerror() for use in ZFS libraries */ static inline char *zfs_strerror(int errnum) { +#ifdef HAVE_STRERROR_L return (strerror_l(errnum, uselocale(0))); +#else + return (strerror(errnum)); +#endif } #ifdef __cplusplus diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h index f63a397f293d..4cb3a055c3c4 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h @@ -38,8 +38,7 @@ #include <sys/rwlock.h> #include <sys/wait.h> #include <sys/wmsum.h> - -typedef struct kstat_s kstat_t; +#include <sys/kstat.h> #define TASKQ_NAMELEN 31 diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h index 606e8bf682e8..3eed968e90c0 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h @@ -30,6 +30,8 @@ extern "C" { #endif +struct abd; + struct abd_scatter { uint_t abd_offset; uint_t abd_nents; @@ -41,10 +43,8 @@ struct abd_linear { struct scatterlist *abd_sgl; /* for LINEAR_PAGE */ }; -typedef struct abd abd_t; - typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *); -int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *, +int abd_iterate_page_func(struct abd *, size_t, size_t, abd_iter_page_func_t *, void *); /* @@ -52,11 +52,11 @@ int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *, * Note: these are only needed to support vdev_classic. See comment in * vdev_disk.c. */ -unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t); -unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); +unsigned int abd_bio_map_off(struct bio *, struct abd *, unsigned int, size_t); +unsigned long abd_nr_pages_off(struct abd *, unsigned int, size_t); __attribute__((malloc)) -abd_t *abd_alloc_from_pages(struct page **, unsigned long, uint64_t); +struct abd *abd_alloc_from_pages(struct page **, unsigned long, uint64_t); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h index 7067eb17900d..30aa3a103d33 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h @@ -69,6 +69,7 @@ typedef struct vfs { boolean_t vfs_do_relatime; boolean_t vfs_nbmand; boolean_t vfs_do_nbmand; + kmutex_t vfs_mntpt_lock; } vfs_t; typedef struct zfs_mnt { diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h index 55b150c044ee..43d6e3f96ea3 100644 --- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h @@ -42,7 +42,8 @@ extern "C" { #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_DELAY "delay" #define FM_EREPORT_ZFS_DEADMAN "deadman" -#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify" +#define FM_EREPORT_ZFS_DIO_VERIFY_WR "dio_verify_wr" +#define FM_EREPORT_ZFS_DIO_VERIFY_RD "dio_verify_rd" #define FM_EREPORT_ZFS_POOL "zpool" #define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" #define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h index a34bc00ca4df..64f484e9aa13 100644 --- a/sys/contrib/openzfs/include/sys/vdev_raidz.h +++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h @@ -57,7 +57,7 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); void vdev_raidz_child_done(zio_t *); void vdev_raidz_io_done(zio_t *); void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); -struct raidz_row *vdev_raidz_row_alloc(int); +struct raidz_row *vdev_raidz_row_alloc(int, zio_t *); void vdev_raidz_reflow_copy_scratch(spa_t *); void raidz_dtl_reassessed(vdev_t *); diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index f9409433e031..46f5d68aed4a 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -208,25 +208,25 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_PROBE (1ULL << 16) #define ZIO_FLAG_TRYHARD (1ULL << 17) #define ZIO_FLAG_OPTIONAL (1ULL << 18) - +#define ZIO_FLAG_DIO_READ (1ULL << 19) #define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) /* * Flags not inherited by any children. */ -#define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */ -#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20) -#define ZIO_FLAG_IO_BYPASS (1ULL << 21) -#define ZIO_FLAG_IO_REWRITE (1ULL << 22) -#define ZIO_FLAG_RAW_COMPRESS (1ULL << 23) -#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24) -#define ZIO_FLAG_GANG_CHILD (1ULL << 25) -#define ZIO_FLAG_DDT_CHILD (1ULL << 26) -#define ZIO_FLAG_GODFATHER (1ULL << 27) -#define ZIO_FLAG_NOPWRITE (1ULL << 28) -#define ZIO_FLAG_REEXECUTED (1ULL << 29) -#define ZIO_FLAG_DELEGATED (1ULL << 30) -#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31) +#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */ +#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21) +#define ZIO_FLAG_IO_BYPASS (1ULL << 22) +#define ZIO_FLAG_IO_REWRITE (1ULL << 23) +#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24) +#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25) +#define ZIO_FLAG_GANG_CHILD (1ULL << 26) +#define ZIO_FLAG_DDT_CHILD (1ULL << 27) +#define ZIO_FLAG_GODFATHER (1ULL << 28) +#define ZIO_FLAG_NOPWRITE (1ULL << 29) +#define ZIO_FLAG_REEXECUTED (1ULL << 30) +#define ZIO_FLAG_DELEGATED (1ULL << 31) +#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) @@ -647,6 +647,7 @@ extern void zio_vdev_io_redone(zio_t *zio); extern void zio_change_priority(zio_t *pio, zio_priority_t priority); extern void zio_checksum_verified(zio_t *zio); +extern void zio_dio_chksum_verify_error_report(zio_t *zio); extern int zio_worst_error(int e1, int e2); extern enum zio_checksum zio_checksum_select(enum zio_checksum child, diff --git a/sys/contrib/openzfs/lib/libspl/backtrace.c b/sys/contrib/openzfs/lib/libspl/backtrace.c index d26d742106e2..6e8b3b12122d 100644 --- a/sys/contrib/openzfs/lib/libspl/backtrace.c +++ b/sys/contrib/openzfs/lib/libspl/backtrace.c @@ -25,19 +25,32 @@ #include <sys/backtrace.h> #include <sys/types.h> +#include <sys/debug.h> #include <unistd.h> /* - * libspl_backtrace() must be safe to call from inside a signal hander. This - * mostly means it must not allocate, and so we can't use things like printf. + * Output helpers. libspl_backtrace() must not block, must be thread-safe and + * must be safe to call from a signal handler. At least, that means not having + * printf, so we end up having to call write() directly on the fd. That's + * awkward, as we always have to pass through a length, and some systems will + * complain if we don't consume the return. So we have some macros to make + * things a little more palatable. */ +#define spl_bt_write_n(fd, s, n) \ + do { ssize_t r __maybe_unused = write(fd, s, n); } while (0) +#define spl_bt_write(fd, s) spl_bt_write_n(fd, s, sizeof (s)) #if defined(HAVE_LIBUNWIND) #define UNW_LOCAL_ONLY #include <libunwind.h> +/* + * Convert `v` to ASCII hex characters. The bottom `n` nybbles (4-bits ie one + * hex digit) will be written, up to `buflen`. The buffer will not be + * null-terminated. Returns the number of digits written. + */ static size_t -libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen) +spl_bt_u64_to_hex_str(uint64_t v, size_t n, char *buf, size_t buflen) { static const char hexdigits[] = { '0', '1', '2', '3', '4', '5', '6', '7', @@ -45,10 +58,10 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen) }; size_t pos = 0; - boolean_t want = (digits == 0); + boolean_t want = (n == 0); for (int i = 15; i >= 0; i--) { const uint64_t d = v >> (i * 4) & 0xf; - if (!want && (d != 0 || digits > i)) + if (!want && (d != 0 || n > i)) want = B_TRUE; if (want) { buf[pos++] = hexdigits[d]; @@ -62,40 +75,181 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen) void libspl_backtrace(int fd) { - ssize_t ret __attribute__((unused)); unw_context_t uc; unw_cursor_t cp; - unw_word_t loc; + unw_word_t v; char buf[128]; size_t n; + int err; - ret = write(fd, "Call trace:\n", 12); + /* Snapshot the current frame and state. */ unw_getcontext(&uc); + + /* + * TODO: walk back to the frame that tripped the assertion / the place + * where the signal was recieved. + */ + + /* + * Register dump. We're going to loop over all the registers in the + * top frame, and show them, with names, in a nice three-column + * layout, which keeps us within 80 columns. + */ + spl_bt_write(fd, "Registers:\n"); + + /* Initialise a frame cursor, starting at the current frame */ unw_init_local(&cp, &uc); - while (unw_step(&cp) > 0) { - unw_get_reg(&cp, UNW_REG_IP, &loc); - ret = write(fd, " [0x", 5); - n = libspl_u64_to_hex_str(loc, 10, buf, sizeof (buf)); - ret = write(fd, buf, n); - ret = write(fd, "] ", 2); - unw_get_proc_name(&cp, buf, sizeof (buf), &loc); - for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} - ret = write(fd, buf, n); - ret = write(fd, "+0x", 3); - n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf)); - ret = write(fd, buf, n); + + /* + * libunwind's list of possible registers for this architecture is an + * enum, unw_regnum_t. UNW_TDEP_LAST_REG is the highest-numbered + * register in that list, however, not all register numbers in this + * range are defined by the architecture, and not all defined registers + * will be present on every implementation of that architecture. + * Moreover, libunwind provides nice names for most, but not all + * registers, but these are hardcoded; a name being available does not + * mean that register is available. + * + * So, we have to pull this all together here. We try to get the value + * of every possible register. If we get a value for it, then the + * register must exist, and so we get its name. If libunwind has no + * name for it, we synthesize something. These cases should be rare, + * and they're usually for uninteresting or niche registers, so it + * shouldn't really matter. We can see the value, and that's the main + * thing. + */ + uint_t cols = 0; + for (uint_t regnum = 0; regnum <= UNW_TDEP_LAST_REG; regnum++) { + /* + * Get the value. Any error probably means the register + * doesn't exist, and we skip it. + */ + if (unw_get_reg(&cp, regnum, &v) < 0) + continue; + + /* + * Register name. If libunwind doesn't have a name for it, + * it will return "???". As a shortcut, we just treat '?' + * is an alternate end-of-string character. + */ + const char *name = unw_regname(regnum); + for (n = 0; name[n] != '\0' && name[n] != '?'; n++) {} + if (n == 0) { + /* + * No valid name, so make one of the form "?xx", where + * "xx" is the two-char hex of libunwind's register + * number. + */ + buf[0] = '?'; + n = spl_bt_u64_to_hex_str(regnum, 2, + &buf[1], sizeof (buf)-1) + 1; + name = buf; + } + + /* + * Two spaces of padding before each column, plus extra + * spaces to align register names shorter than three chars. + */ + spl_bt_write_n(fd, " ", 5-MIN(n, 3)); + + /* Register name and column punctuation */ + spl_bt_write_n(fd, name, n); + spl_bt_write(fd, ": 0x"); + + /* + * Convert register value (from unw_get_reg()) to hex. We're + * assuming that all registers are 64-bits wide, which is + * probably fine for any general-purpose registers on any + * machine currently in use. A more generic way would be to + * look at the width of unw_word_t, but that would also + * complicate the column code a bit. This is fine. + */ + n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf)); + spl_bt_write_n(fd, buf, n); + + /* Every third column, emit a newline */ + if (!(++cols % 3)) + spl_bt_write(fd, "\n"); + } + + /* If we finished before the third column, emit a newline. */ + if (cols % 3) + spl_bt_write(fd, "\n"); + + /* Now the main event, the backtrace. */ + spl_bt_write(fd, "Call trace:\n"); + + /* Reset the cursor to the top again. */ + unw_init_local(&cp, &uc); + + do { + /* + * Getting the IP should never fail; libunwind handles it + * specially, because its used a lot internally. Still, no + * point being silly about it, as the last thing we want is + * our crash handler to crash. So if it ever does fail, we'll + * show an error line, but keep going to the next frame. + */ + if (unw_get_reg(&cp, UNW_REG_IP, &v) < 0) { + spl_bt_write(fd, " [couldn't get IP register; " + "corrupt frame?]"); + continue; + } + + /* IP & punctuation */ + n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf)); + spl_bt_write(fd, " [0x"); + spl_bt_write_n(fd, buf, n); + spl_bt_write(fd, "] "); + + /* + * Function ("procedure") name for the current frame. `v` + * receives the offset from the named function to the IP, which + * we show as a "+offset" suffix. + * + * If libunwind can't determine the name, we just show "???" + * instead. We've already displayed the IP above; that will + * have to do. + * + * unw_get_proc_name() will return ENOMEM if the buffer is too + * small, instead truncating the name. So we treat that as a + * success and use whatever is in the buffer. + */ + err = unw_get_proc_name(&cp, buf, sizeof (buf), &v); + if (err == 0 || err == -UNW_ENOMEM) { + for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} + spl_bt_write_n(fd, buf, n); + + /* Offset from proc name */ + spl_bt_write(fd, "+0x"); + n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf)); + spl_bt_write_n(fd, buf, n); + } else + spl_bt_write(fd, "???"); + #ifdef HAVE_LIBUNWIND_ELF - ret = write(fd, " (in ", 5); - unw_get_elf_filename(&cp, buf, sizeof (buf), &loc); - for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} - ret = write(fd, buf, n); - ret = write(fd, " +0x", 4); - n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf)); - ret = write(fd, buf, n); - ret = write(fd, ")", 1); + /* + * Newer libunwind has unw_get_elf_filename(), which gets + * the name of the ELF object that the frame was executing in. + * Like `unw_get_proc_name()`, `v` recieves the offset within + * the file, and UNW_ENOMEM indicates that a truncate filename + * was left in the buffer. + */ + err = unw_get_elf_filename(&cp, buf, sizeof (buf), &v); + if (err == 0 || err == -UNW_ENOMEM) { + for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} + spl_bt_write(fd, " (in "); + spl_bt_write_n(fd, buf, n); + + /* Offset within file */ + spl_bt_write(fd, " +0x"); + n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf)); + spl_bt_write_n(fd, buf, n); + spl_bt_write(fd, ")"); + } #endif - ret = write(fd, "\n", 1); - } + spl_bt_write(fd, "\n"); + } while (unw_step(&cp) > 0); } #elif defined(HAVE_BACKTRACE) #include <execinfo.h> @@ -103,15 +257,12 @@ libspl_backtrace(int fd) void libspl_backtrace(int fd) { - ssize_t ret __attribute__((unused)); void *btptrs[64]; size_t nptrs = backtrace(btptrs, 64); - ret = write(fd, "Call trace:\n", 12); + spl_bt_write(fd, "Call trace:\n"); backtrace_symbols_fd(btptrs, nptrs, fd); } #else -#include <sys/debug.h> - void libspl_backtrace(int fd __maybe_unused) { diff --git a/sys/contrib/openzfs/lib/libzfs/Makefile.am b/sys/contrib/openzfs/lib/libzfs/Makefile.am index a976faaf9913..5f8963dccd1a 100644 --- a/sys/contrib/openzfs/lib/libzfs/Makefile.am +++ b/sys/contrib/openzfs/lib/libzfs/Makefile.am @@ -70,7 +70,7 @@ if BUILD_FREEBSD libzfs_la_LIBADD += -lutil -lgeom endif -libzfs_la_LDFLAGS += -version-info 5:0:1 +libzfs_la_LDFLAGS += -version-info 6:0:0 pkgconfig_DATA += %D%/libzfs.pc diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 1a96460c2b84..ac9ae233c72d 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -1,4 +1,4 @@ -<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.4'> +<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.6'> <elf-needed> <dependency name='libzfs_core.so.3'/> <dependency name='libnvpair.so.3'/> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index 14410b153130..44f2c6f19dff 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -2796,7 +2796,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) } /* - * With EBUSY, five cases are possible: + * With EBUSY, six cases are possible: * * Current state Requested * 1. Normal Scrub Running Normal Scrub or Error Scrub diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am index 397959c679e9..404b737c204d 100644 --- a/sys/contrib/openzfs/lib/libzpool/Makefile.am +++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am @@ -212,7 +212,7 @@ if BUILD_FREEBSD libzpool_la_LIBADD += -lgeom endif -libzpool_la_LDFLAGS += -version-info 5:0:0 +libzpool_la_LDFLAGS += -version-info 6:0:0 if TARGET_CPU_POWERPC module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec diff --git a/sys/contrib/openzfs/lib/libzpool/zfs_debug.c b/sys/contrib/openzfs/lib/libzpool/zfs_debug.c index df49a9a33fe8..82c7229932f0 100644 --- a/sys/contrib/openzfs/lib/libzpool/zfs_debug.c +++ b/sys/contrib/openzfs/lib/libzpool/zfs_debug.c @@ -35,9 +35,25 @@ typedef struct zfs_dbgmsg { static list_t zfs_dbgmsgs; static kmutex_t zfs_dbgmsgs_lock; +static uint_t zfs_dbgmsg_size = 0; +static uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */ int zfs_dbgmsg_enable = B_TRUE; +static void +zfs_dbgmsg_purge(uint_t max_size) +{ + while (zfs_dbgmsg_size > max_size) { + zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs); + if (zdm == NULL) + return; + + uint_t size = zdm->zdm_size; + kmem_free(zdm, size); + zfs_dbgmsg_size -= size; + } +} + void zfs_dbgmsg_init(void) { @@ -74,6 +90,8 @@ __zfs_dbgmsg(char *buf) mutex_enter(&zfs_dbgmsgs_lock); list_insert_tail(&zfs_dbgmsgs, zdm); + zfs_dbgmsg_size += size; + zfs_dbgmsg_purge(zfs_dbgmsg_maxsize); mutex_exit(&zfs_dbgmsgs_lock); } diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index e19573d5ee14..c9f6ed0dece3 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -436,7 +436,7 @@ write. It can also help to identify if reported checksum errors are tied to Direct I/O writes. Each verify error causes a -.Sy dio_verify +.Sy dio_verify_wr zevent. Direct Write I/O checkum verify errors can be seen with .Nm zpool Cm status Fl d . diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 index a2d38add4ee0..aaf91825a6c6 100644 --- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 +++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 @@ -92,6 +92,11 @@ before a generic mapping for the same slot. In this way a custom mapping may be applied to a particular channel and a default mapping applied to the others. . +.It Sy zpad_slot Ar digits +Pad slot numbers with zeros to make them +.Ar digits +long, which can help to make disk names a consistent length and easier to sort. +. .It Sy multipath Sy yes Ns | Ns Sy no Specifies whether .Xr vdev_id 8 @@ -122,7 +127,7 @@ device is connected to. The default is .Sy 4 . . -.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy ses +.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy bay_lun Ns | Ns Sy ses Specifies from which element of a SAS identifier the slot number is taken. The default is @@ -138,6 +143,9 @@ use the SAS port as the slot number. use the scsi id as the slot number. .It Sy lun use the scsi lun as the slot number. +.It Sy bay_lun +read the slot number from the bay identifier and append the lun number. +Useful for multi-lun multi-actuator hard drives. .It Sy ses use the SCSI Enclosure Services (SES) enclosure device slot number, as reported by diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8 index b49def08b72b..0fa7e9cc2613 100644 --- a/sys/contrib/openzfs/man/man8/zfs-list.8 +++ b/sys/contrib/openzfs/man/man8/zfs-list.8 @@ -71,7 +71,7 @@ The following fields are displayed: Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space. -.It Fl j Op Ar --json-int +.It Fl j , -json Op Ar --json-int Print the output in JSON format. Specify .Sy --json-int diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8 index 6116fbaab77f..921e2499e31b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-mount.8 +++ b/sys/contrib/openzfs/man/man8/zfs-mount.8 @@ -59,7 +59,7 @@ .Xc Displays all ZFS file systems currently mounted. .Bl -tag -width "-j" -.It Fl j +.It Fl j , -json Displays all mounted file systems in JSON format. .El .It Xo diff --git a/sys/contrib/openzfs/man/man8/zfs-program.8 b/sys/contrib/openzfs/man/man8/zfs-program.8 index 928620362be7..460cd2e11cf3 100644 --- a/sys/contrib/openzfs/man/man8/zfs-program.8 +++ b/sys/contrib/openzfs/man/man8/zfs-program.8 @@ -50,7 +50,7 @@ and any attempts to access or modify other pools will cause an error. . .Sh OPTIONS .Bl -tag -width "-t" -.It Fl j +.It Fl j , -json Display channel program output in JSON format. When this flag is specified and standard output is empty - channel program encountered an error. diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8 index 204450d72ec9..f1718608b44b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-set.8 +++ b/sys/contrib/openzfs/man/man8/zfs-set.8 @@ -130,7 +130,7 @@ The value can be used to display all properties that apply to the given dataset's type .Pq Sy filesystem , volume , snapshot , No or Sy bookmark . .Bl -tag -width "-s source" -.It Fl j Op Ar --json-int +.It Fl j , -json Op Ar --json-int Display the output in JSON format. Specify .Sy --json-int diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8 index 234612baea8d..01f849845bd9 100644 --- a/sys/contrib/openzfs/man/man8/zpool-events.8 +++ b/sys/contrib/openzfs/man/man8/zpool-events.8 @@ -98,7 +98,10 @@ This can be an indicator of problems with the underlying storage device. The number of delay events is ratelimited by the .Sy zfs_slow_io_events_per_second module parameter. -.It Sy dio_verify +.It Sy dio_verify_rd +Issued when there was a checksum verify error after a Direct I/O read has been +issued. +.It Sy dio_verify_wr Issued when there was a checksum verify error after a Direct I/O write has been issued. This event can only take place if the module parameter diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8 index 5384906f17f2..1be83526d22d 100644 --- a/sys/contrib/openzfs/man/man8/zpool-get.8 +++ b/sys/contrib/openzfs/man/man8/zpool-get.8 @@ -98,7 +98,7 @@ See the .Xr zpoolprops 7 manual page for more information on the available pool properties. .Bl -tag -compact -offset Ds -width "-o field" -.It Fl j Op Ar --json-int, --json-pool-key-guid +.It Fl j , -json Op Ar --json-int, --json-pool-key-guid Display the list of properties in JSON format. Specify .Sy --json-int @@ -157,7 +157,7 @@ See the .Xr vdevprops 7 manual page for more information on the available pool properties. .Bl -tag -compact -offset Ds -width "-o field" -.It Fl j Op Ar --json-int +.It Fl j , -json Op Ar --json-int Display the list of properties in JSON format. Specify .Sy --json-int diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8 index b0ee659701d4..6d3478a68711 100644 --- a/sys/contrib/openzfs/man/man8/zpool-list.8 +++ b/sys/contrib/openzfs/man/man8/zpool-list.8 @@ -59,7 +59,7 @@ is specified, the command exits after .Ar count reports are printed. .Bl -tag -width Ds -.It Fl j Op Ar --json-int, --json-pool-key-guid +.It Fl j , -json Op Ar --json-int, --json-pool-key-guid Display the list of pools in JSON format. Specify .Sy --json-int diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8 index 868fc4414dbb..b9b54185d050 100644 --- a/sys/contrib/openzfs/man/man8/zpool-status.8 +++ b/sys/contrib/openzfs/man/man8/zpool-status.8 @@ -70,7 +70,7 @@ See the option of .Nm zpool Cm iostat for complete details. -.It Fl j Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid +.It Fl j , -json Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid Display the status for ZFS pools in JSON format. Specify .Sy --json-int @@ -82,14 +82,18 @@ Specify .Sy --json-pool-key-guid to set pool GUID as key for pool objects instead of pool names. .It Fl d -Display the number of Direct I/O write checksum verify errors that have occured -on a top-level VDEV. +Display the number of Direct I/O read/write checksum verify errors that have +occured on a top-level VDEV. See .Sx zfs_vdev_direct_write_verify in .Xr zfs 4 for details about the conditions that can cause Direct I/O write checksum verify failures to occur. +Direct I/O reads checksum verify errors can also occur if the contents of the +buffer are being manipulated after the I/O has been issued and is in flight. +In the case of Direct I/O read checksum verify errors, the I/O will be reissued +through the ARC. .It Fl D Display a histogram of deduplication statistics, showing the allocated .Pq physically present on disk diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c index f20dc5d8c325..ab4f49a4ec5a 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c @@ -620,9 +620,16 @@ abd_borrow_buf_copy(abd_t *abd, size_t n) /* * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will - * no change the contents of the ABD and will ASSERT that you didn't modify - * the buffer since it was borrowed. If you want any changes you made to buf to - * be copied back to abd, use abd_return_buf_copy() instead. + * not change the contents of the ABD. If you want any changes you made to + * buf to be copied back to abd, use abd_return_buf_copy() instead. If the + * ABD is not constructed from user pages from Direct I/O then an ASSERT + * checks to make sure the contents of the buffer have not changed since it was + * borrowed. We can not ASSERT the contents of the buffer have not changed if + * it is composed of user pages. While Direct I/O write pages are placed under + * write protection and can not be changed, this is not the case for Direct I/O + * reads. The pages of a Direct I/O read could be manipulated at any time. + * Checksum verifications in the ZIO pipeline check for this issue and handle + * it by returning an error on checksum verification failure. */ void abd_return_buf(abd_t *abd, void *buf, size_t n) @@ -632,8 +639,34 @@ abd_return_buf(abd_t *abd, void *buf, size_t n) #ifdef ZFS_DEBUG (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); #endif - if (abd_is_linear(abd)) { + if (abd_is_from_pages(abd)) { + if (!abd_is_linear_page(abd)) + zio_buf_free(buf, n); + } else if (abd_is_linear(abd)) { ASSERT3P(buf, ==, abd_to_buf(abd)); + } else if (abd_is_gang(abd)) { +#ifdef ZFS_DEBUG + /* + * We have to be careful with gang ABD's that we do not ASSERT + * for any ABD's that contain user pages from Direct I/O. See + * the comment above about Direct I/O read buffers possibly + * being manipulated. In order to handle this, we jsut iterate + * through the gang ABD and only verify ABD's that are not from + * user pages. + */ + void *cmp_buf = buf; + + for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain); + cabd != NULL; + cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { + if (!abd_is_from_pages(cabd)) { + ASSERT0(abd_cmp_buf(cabd, cmp_buf, + cabd->abd_size)); + } + cmp_buf = (char *)cmp_buf + cabd->abd_size; + } +#endif + zio_buf_free(buf, n); } else { ASSERT0(abd_cmp_buf(abd, buf, n)); zio_buf_free(buf, n); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 03362b1ee860..303af48cf3af 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -1008,7 +1008,9 @@ abd_borrow_buf_copy(abd_t *abd, size_t n) * borrowed. We can not ASSERT that the contents of the buffer have not changed * if it is composed of user pages because the pages can not be placed under * write protection and the user could have possibly changed the contents in - * the pages at any time. + * the pages at any time. This is also an issue for Direct I/O reads. Checksum + * verifications in the ZIO pipeline check for this issue and handle it by + * returning an error on checksum verification failure. */ void abd_return_buf(abd_t *abd, void *buf, size_t n) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index 8a42a075cd25..f60d6ae91e0b 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -767,9 +767,6 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, uint64_t id, pos = 0; int error = 0; - if (zfsvfs->z_vfs->vfs_mntpoint == NULL) - return (SET_ERROR(ENOENT)); - cookie = spl_fstrans_mark(); snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); @@ -786,8 +783,14 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, break; } - snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", - zfsvfs->z_vfs->vfs_mntpoint, snapname); + mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); + if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { + snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", + zfsvfs->z_vfs->vfs_mntpoint, snapname); + } else + error = SET_ERROR(ENOENT); + mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); + out: kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); spl_fstrans_unmark(cookie); @@ -1050,6 +1053,66 @@ exportfs_flush(void) } /* + * Returns the path in char format for given struct path. Uses + * d_path exported by kernel to convert struct path to char + * format. Returns the correct path for mountpoints and chroot + * environments. + * + * If chroot environment has directories that are mounted with + * --bind or --rbind flag, d_path returns the complete path inside + * chroot environment but does not return the absolute path, i.e. + * the path to chroot environment is missing. + */ +static int +get_root_path(struct path *path, char *buff, int len) +{ + char *path_buffer, *path_ptr; + int error = 0; + + path_get(path); + path_buffer = kmem_zalloc(len, KM_SLEEP); + path_ptr = d_path(path, path_buffer, len); + if (IS_ERR(path_ptr)) + error = SET_ERROR(-PTR_ERR(path_ptr)); + else + strcpy(buff, path_ptr); + + kmem_free(path_buffer, len); + path_put(path); + return (error); +} + +/* + * Returns if the current process root is chrooted or not. Linux + * kernel exposes the task_struct for current process and init. + * Since init process root points to actual root filesystem when + * Linux runtime is reached, we can compare the current process + * root with init process root to determine if root of the current + * process is different from init, which can reliably determine if + * current process is in chroot context or not. + */ +static int +is_current_chrooted(void) +{ + struct task_struct *curr = current, *global = &init_task; + struct path cr_root, gl_root; + + task_lock(curr); + get_fs_root(curr->fs, &cr_root); + task_unlock(curr); + + task_lock(global); + get_fs_root(global->fs, &gl_root); + task_unlock(global); + + int chrooted = !path_equal(&cr_root, &gl_root); + path_put(&gl_root); + path_put(&cr_root); + + return (chrooted); +} + +/* * Attempt to unmount a snapshot by making a call to user space. * There is no assurance that this can or will succeed, is just a * best effort. In the case where it does fail, perhaps because @@ -1123,14 +1186,50 @@ zfsctl_snapshot_mount(struct path *path, int flags) if (error) goto error; + if (is_current_chrooted() == 0) { + /* + * Current process is not in chroot context + */ + + char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + struct path mnt_path; + mnt_path.mnt = path->mnt; + mnt_path.dentry = path->mnt->mnt_root; + + /* + * Get path to current mountpoint + */ + error = get_root_path(&mnt_path, m, MAXPATHLEN); + if (error != 0) { + kmem_free(m, MAXPATHLEN); + goto error; + } + mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); + if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { + /* + * If current mnountpoint and vfs_mntpoint are not same, + * store current mountpoint in vfs_mntpoint. + */ + if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) { + kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint); + zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); + } + } else + zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); + mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); + kmem_free(m, MAXPATHLEN); + } + /* * Construct a mount point path from sb of the ctldir inode and dirent * name, instead of from d_path(), so that chroot'd process doesn't fail * on mount.zfs(8). */ + mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", dname(dentry)); + mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); snprintf(options, 7, "%s", zfs_snapshot_no_setuid ? "nosuid" : "suid"); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index de3e8c89cfdd..3c53a8a315c3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -115,7 +115,7 @@ zfsvfs_vfs_free(vfs_t *vfsp) if (vfsp != NULL) { if (vfsp->vfs_mntpoint != NULL) kmem_strfree(vfsp->vfs_mntpoint); - + mutex_destroy(&vfsp->vfs_mntpt_lock); kmem_free(vfsp, sizeof (vfs_t)); } } @@ -197,10 +197,11 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp) vfsp->vfs_do_nbmand = B_TRUE; break; case TOKEN_MNTPOINT: + if (vfsp->vfs_mntpoint != NULL) + kmem_strfree(vfsp->vfs_mntpoint); vfsp->vfs_mntpoint = match_strdup(&args[0]); if (vfsp->vfs_mntpoint == NULL) return (SET_ERROR(ENOMEM)); - break; default: break; @@ -219,6 +220,7 @@ zfsvfs_parse_options(char *mntopts, vfs_t **vfsp) int error; tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP); + mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL); if (mntopts != NULL) { substring_t args[MAX_OPT_ARGS]; diff --git a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c index 622323bbbd5f..43bccea14a85 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c @@ -206,6 +206,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "PR", "PROBE" }, { '.', "TH", "TRYHARD" }, { '.', "OP", "OPTIONAL" }, + { '.', "RD", "DIO_READ" }, { '.', "DQ", "DONT_QUEUE" }, { '.', "DP", "DONT_PROPAGATE" }, { '.', "BY", "IO_BYPASS" }, diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c index 91a7fd8df464..ed96e7515bc7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_direct.c +++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c @@ -330,7 +330,7 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size, */ zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size, dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ, - ZIO_FLAG_CANFAIL, &zb); + ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_READ, &zb); mutex_exit(&db->db_mtx); zfs_racct_read(spa, db->db.db_size, 1, flags); diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index 6a9ed891093b..2248f644bee7 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -2987,6 +2987,7 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, dsl_dataset_t *ds; uint64_t val; dmu_tx_t *tx = ddrsa->ddrsa_tx; + char *oldname, *newname; int error; error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); @@ -3011,8 +3012,14 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, VERIFY0(zap_add(dp->dp_meta_objset, dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); - zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname, - ddrsa->ddrsa_newsnapname, B_TRUE); + + oldname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_oldsnapname); + newname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_newsnapname); + zvol_rename_minors(dp->dp_spa, oldname, newname, B_TRUE); + kmem_strfree(oldname); + kmem_strfree(newname); dsl_dataset_rele(ds, FTAG); return (0); diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index 13bb33cc6871..419c8ac5bb28 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -1026,7 +1026,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset, ASSERT3U(vdc->vdc_nparity, >, 0); - raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth); + raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth, zio); rr->rr_bigcols = bc; rr->rr_firstdatacol = vdc->vdc_nparity; #ifdef ZFS_DEBUG diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index acb725696674..e3dba0257b21 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -34,6 +34,7 @@ #include <sys/zap.h> #include <sys/abd.h> #include <sys/zthr.h> +#include <sys/fm/fs/zfs.h> /* * An indirect vdev corresponds to a vdev that has been removed. Since @@ -1832,6 +1833,19 @@ vdev_indirect_io_done(zio_t *zio) zio_bad_cksum_t zbc; int ret = zio_checksum_error(zio, &zbc); + /* + * Any Direct I/O read that has a checksum error must be treated as + * suspicious as the contents of the buffer could be getting + * manipulated while the I/O is taking place. The checksum verify error + * will be reported to the top-level VDEV. + */ + if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) { + zio->io_error = ret; + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio_dio_chksum_verify_error_report(zio); + ret = 0; + } + if (ret == 0) { zio_checksum_verified(zio); return; diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c index 102eacb03349..65a840bf9728 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c +++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c @@ -765,6 +765,27 @@ vdev_mirror_io_done(zio_t *zio) ASSERT(zio->io_type == ZIO_TYPE_READ); /* + * Any Direct I/O read that has a checksum error must be treated as + * suspicious as the contents of the buffer could be getting + * manipulated while the I/O is taking place. The checksum verify error + * will be reported to the top-level Mirror VDEV. + * + * There will be no attampt at reading any additional data copies. If + * the buffer is still being manipulated while attempting to read from + * another child, there exists a possibly that the checksum could be + * verified as valid. However, the buffer contents could again get + * manipulated after verifying the checksum. This would lead to bad data + * being written out during self healing. + */ + if ((zio->io_flags & ZIO_FLAG_DIO_READ) && + (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { + zio_dio_chksum_verify_error_report(zio); + zio->io_error = vdev_mirror_worst_error(mm); + ASSERT3U(zio->io_error, ==, ECKSUM); + return; + } + + /* * If we don't have a good copy yet, keep trying other children. */ if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index 15c8b8ca6016..5e330626be2b 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -433,7 +433,7 @@ const zio_vsd_ops_t vdev_raidz_vsd_ops = { }; raidz_row_t * -vdev_raidz_row_alloc(int cols) +vdev_raidz_row_alloc(int cols, zio_t *zio) { raidz_row_t *rr = kmem_zalloc(offsetof(raidz_row_t, rr_col[cols]), KM_SLEEP); @@ -445,7 +445,17 @@ vdev_raidz_row_alloc(int cols) raidz_col_t *rc = &rr->rr_col[c]; rc->rc_shadow_devidx = INT_MAX; rc->rc_shadow_offset = UINT64_MAX; - rc->rc_allow_repair = 1; + /* + * We can not allow self healing to take place for Direct I/O + * reads. There is nothing that stops the buffer contents from + * being manipulated while the I/O is in flight. It is possible + * that the checksum could be verified on the buffer and then + * the contents of that buffer are manipulated afterwards. This + * could lead to bad data being written out during self + * healing. + */ + if (!(zio->io_flags & ZIO_FLAG_DIO_READ)) + rc->rc_allow_repair = 1; } return (rr); } @@ -619,7 +629,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, } ASSERT3U(acols, <=, scols); - rr = vdev_raidz_row_alloc(scols); + rr = vdev_raidz_row_alloc(scols, zio); rm->rm_row[0] = rr; rr->rr_cols = acols; rr->rr_bigcols = bc; @@ -765,7 +775,7 @@ vdev_raidz_map_alloc_expanded(zio_t *zio, for (uint64_t row = 0; row < rows; row++) { boolean_t row_use_scratch = B_FALSE; - raidz_row_t *rr = vdev_raidz_row_alloc(cols); + raidz_row_t *rr = vdev_raidz_row_alloc(cols, zio); rm->rm_row[row] = rr; /* The starting RAIDZ (parent) vdev sector of the row. */ @@ -2633,6 +2643,20 @@ raidz_checksum_verify(zio_t *zio) raidz_map_t *rm = zio->io_vsd; int ret = zio_checksum_error(zio, &zbc); + /* + * Any Direct I/O read that has a checksum error must be treated as + * suspicious as the contents of the buffer could be getting + * manipulated while the I/O is taking place. The checksum verify error + * will be reported to the top-level RAIDZ VDEV. + */ + if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) { + zio->io_error = ret; + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio_dio_chksum_verify_error_report(zio); + zio_checksum_verified(zio); + return (0); + } + if (ret != 0 && zbc.zbc_injected != 0) rm->rm_ecksuminjected = 1; @@ -2776,6 +2800,11 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr) (rc->rc_error == 0 || rc->rc_size == 0)) { continue; } + /* + * We do not allow self healing for Direct I/O reads. + * See comment in vdev_raid_row_alloc(). + */ + ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ); zfs_dbgmsg("zio=%px repairing c=%u devidx=%u " "offset=%llx", @@ -2979,6 +3008,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) /* Check for success */ if (raidz_checksum_verify(zio) == 0) { + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + return (0); /* Reconstruction succeeded - report errors */ for (int i = 0; i < rm->rm_nrows; i++) { @@ -3379,7 +3410,6 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio) zio_bad_cksum_t zbc; zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - mutex_enter(&cvd->vdev_stat_lock); cvd->vdev_stat.vs_checksum_errors++; mutex_exit(&cvd->vdev_stat_lock); @@ -3444,6 +3474,9 @@ vdev_raidz_io_done(zio_t *zio) } if (raidz_checksum_verify(zio) == 0) { + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + goto done; + for (int i = 0; i < rm->rm_nrows; i++) { raidz_row_t *rr = rm->rm_row[i]; vdev_raidz_io_done_verified(zio, rr); @@ -3538,6 +3571,7 @@ vdev_raidz_io_done(zio_t *zio) } } } +done: if (rm->rm_lr != NULL) { zfs_rangelock_exit(rm->rm_lr); rm->rm_lr = NULL; diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index a96f508ffac0..0799f17758bf 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -77,8 +77,15 @@ static int zfs_bclone_wait_dirty = 0; * Enable Direct I/O. If this setting is 0, then all I/O requests will be * directed through the ARC acting as though the dataset property direct was * set to disabled. + * + * Disabled by default on FreeBSD until a potential range locking issue in + * zfs_getpages() can be resolved. */ +#ifdef __FreeBSD__ static int zfs_dio_enabled = 0; +#else +static int zfs_dio_enabled = 1; +#endif /* @@ -303,6 +310,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) (void) cr; int error = 0; boolean_t frsync = B_FALSE; + boolean_t dio_checksum_failure = B_FALSE; zfsvfs_t *zfsvfs = ZTOZSB(zp); if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) @@ -424,8 +432,26 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) if (error) { /* convert checksum errors into IO errors */ - if (error == ECKSUM) - error = SET_ERROR(EIO); + if (error == ECKSUM) { + /* + * If a Direct I/O read returned a checksum + * verify error, then it must be treated as + * suspicious. The contents of the buffer could + * have beeen manipulated while the I/O was in + * flight. In this case, the remainder of I/O + * request will just be reissued through the + * ARC. + */ + if (uio->uio_extflg & UIO_DIRECT) { + dio_checksum_failure = B_TRUE; + uio->uio_extflg &= ~UIO_DIRECT; + n += dio_remaining_resid; + dio_remaining_resid = 0; + continue; + } else { + error = SET_ERROR(EIO); + } + } #if defined(__linux__) /* @@ -472,6 +498,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) out: zfs_rangelock_exit(lr); + if (dio_checksum_failure == B_TRUE) + uio->uio_extflg |= UIO_DIRECT; + /* * Cleanup for Direct I/O if requested. */ diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index b26f5e80abfb..a5daf73d59ba 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -804,11 +804,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait, pio->io_reexecute |= zio->io_reexecute; ASSERT3U(*countp, >, 0); - if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { - ASSERT3U(*errorp, ==, EIO); - ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + /* + * Propogate the Direct I/O checksum verify failure to the parent. + */ + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; - } (*countp)--; @@ -1573,6 +1573,14 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, */ pipeline |= ZIO_STAGE_CHECKSUM_VERIFY; pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; + /* + * We never allow the mirror VDEV to attempt reading from any + * additional data copies after the first Direct I/O checksum + * verify failure. This is to avoid bad data being written out + * through the mirror during self healing. See comment in + * vdev_mirror_io_done() for more details. + */ + ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); } else if (type == ZIO_TYPE_WRITE && pio->io_prop.zp_direct_write == B_TRUE) { /* @@ -4555,18 +4563,18 @@ zio_vdev_io_assess(zio_t *zio) } /* - * If a Direct I/O write checksum verify error has occurred then this - * I/O should not attempt to be issued again. Instead the EIO will - * be returned. + * If a Direct I/O operation has a checksum verify error then this I/O + * should not attempt to be issued again. */ if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { - ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); - ASSERT3U(zio->io_error, ==, EIO); + if (zio->io_type == ZIO_TYPE_WRITE) { + ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); + ASSERT3U(zio->io_error, ==, EIO); + } zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; return (zio); } - if (zio_injection_enabled && zio->io_error == 0) zio->io_error = zio_handle_fault_injection(zio, EIO); @@ -4864,16 +4872,40 @@ zio_checksum_verify(zio_t *zio) ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL); } + ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); + IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ, + !(zio->io_flags & ZIO_FLAG_SPECULATIVE)); + if ((error = zio_checksum_error(zio, &info)) != 0) { zio->io_error = error; if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - mutex_enter(&zio->io_vd->vdev_stat_lock); - zio->io_vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&zio->io_vd->vdev_stat_lock); - (void) zfs_ereport_start_checksum(zio->io_spa, - zio->io_vd, &zio->io_bookmark, zio, - zio->io_offset, zio->io_size, &info); + if (zio->io_flags & ZIO_FLAG_DIO_READ) { + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + zio_t *pio = zio_unique_parent(zio); + /* + * Any Direct I/O read that has a checksum + * error must be treated as suspicous as the + * contents of the buffer could be getting + * manipulated while the I/O is taking place. + * + * The checksum verify error will only be + * reported here for disk and file VDEV's and + * will be reported on those that the failure + * occurred on. Other types of VDEV's report the + * verify failure in their own code paths. + */ + if (pio->io_child_type == ZIO_CHILD_LOGICAL) { + zio_dio_chksum_verify_error_report(zio); + } + } else { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + (void) zfs_ereport_start_checksum(zio->io_spa, + zio->io_vd, &zio->io_bookmark, zio, + zio->io_offset, zio->io_size, &info); + } } } @@ -4899,22 +4931,8 @@ zio_dio_checksum_verify(zio_t *zio) if ((error = zio_checksum_error(zio, NULL)) != 0) { zio->io_error = error; if (error == ECKSUM) { - mutex_enter(&zio->io_vd->vdev_stat_lock); - zio->io_vd->vdev_stat.vs_dio_verify_errors++; - mutex_exit(&zio->io_vd->vdev_stat_lock); - zio->io_error = SET_ERROR(EIO); zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; - - /* - * The EIO error must be propagated up to the logical - * parent ZIO in zio_notify_parent() so it can be - * returned to dmu_write_abd(). - */ - zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE; - - (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY, - zio->io_spa, zio->io_vd, &zio->io_bookmark, - zio, 0); + zio_dio_chksum_verify_error_report(zio); } } @@ -4933,6 +4951,39 @@ zio_checksum_verified(zio_t *zio) } /* + * Report Direct I/O checksum verify error and create ZED event. + */ +void +zio_dio_chksum_verify_error_report(zio_t *zio) +{ + ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR); + + if (zio->io_child_type == ZIO_CHILD_LOGICAL) + return; + + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_dio_verify_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + if (zio->io_type == ZIO_TYPE_WRITE) { + /* + * Convert checksum error for writes into EIO. + */ + zio->io_error = SET_ERROR(EIO); + /* + * Report dio_verify_wr ZED event. + */ + (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_WR, + zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0); + } else { + /* + * Report dio_verify_rd ZED event. + */ + (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_RD, + zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0); + } +} + +/* * ========================================================================== * Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other. * An error of 0 indicates success. ENXIO indicates whole-device failure, @@ -5343,10 +5394,9 @@ zio_done(zio_t *zio) if (zio->io_reexecute) { /* - * A Direct I/O write that has a checksum verify error should - * not attempt to reexecute. Instead, EAGAIN should just be - * propagated back up so the write can be attempt to be issued - * through the ARC. + * A Direct I/O operation that has a checksum verify error + * should not attempt to reexecute. Instead, the error should + * just be propagated back. */ ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)); diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index c7a00c61f6bb..d0d850af2629 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -99,10 +99,10 @@ License: @ZFS_META_LICENSE@ URL: https://github.com/openzfs/zfs Source0: %{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Requires: libzpool5%{?_isa} = %{version}-%{release} +Requires: libzpool6%{?_isa} = %{version}-%{release} Requires: libnvpair3%{?_isa} = %{version}-%{release} Requires: libuutil3%{?_isa} = %{version}-%{release} -Requires: libzfs5%{?_isa} = %{version}-%{release} +Requires: libzfs6%{?_isa} = %{version}-%{release} Requires: %{name}-kmod = %{version} Provides: %{name}-kmod-common = %{version}-%{release} Obsoletes: spl <= %{version} @@ -150,21 +150,22 @@ Requires: sysstat %description This package contains the core ZFS command line utilities. -%package -n libzpool5 +%package -n libzpool6 Summary: Native ZFS pool library for Linux Group: System Environment/Kernel Obsoletes: libzpool2 <= %{version} Obsoletes: libzpool4 <= %{version} +Obsoletes: libzpool5 <= %{version} -%description -n libzpool5 +%description -n libzpool6 This package contains the zpool library, which provides support for managing zpools %if %{defined ldconfig_scriptlets} -%ldconfig_scriptlets -n libzpool5 +%ldconfig_scriptlets -n libzpool6 %else -%post -n libzpool5 -p /sbin/ldconfig -%postun -n libzpool5 -p /sbin/ldconfig +%post -n libzpool6 -p /sbin/ldconfig +%postun -n libzpool6 -p /sbin/ldconfig %endif %package -n libnvpair3 @@ -211,37 +212,39 @@ This library provides a variety of compatibility functions for OpenZFS: # The library version is encoded in the package name. When updating the # version information it is important to add an obsoletes line below for # the previous version of the package. -%package -n libzfs5 +%package -n libzfs6 Summary: Native ZFS filesystem library for Linux Group: System Environment/Kernel Obsoletes: libzfs2 <= %{version} Obsoletes: libzfs4 <= %{version} +Obsoletes: libzfs5 <= %{version} -%description -n libzfs5 +%description -n libzfs6 This package provides support for managing ZFS filesystems %if %{defined ldconfig_scriptlets} -%ldconfig_scriptlets -n libzfs5 +%ldconfig_scriptlets -n libzfs6 %else -%post -n libzfs5 -p /sbin/ldconfig -%postun -n libzfs5 -p /sbin/ldconfig +%post -n libzfs6 -p /sbin/ldconfig +%postun -n libzfs6 -p /sbin/ldconfig %endif -%package -n libzfs5-devel +%package -n libzfs6-devel Summary: Development headers Group: System Environment/Kernel -Requires: libzfs5%{?_isa} = %{version}-%{release} -Requires: libzpool5%{?_isa} = %{version}-%{release} +Requires: libzfs6%{?_isa} = %{version}-%{release} +Requires: libzpool6%{?_isa} = %{version}-%{release} Requires: libnvpair3%{?_isa} = %{version}-%{release} Requires: libuutil3%{?_isa} = %{version}-%{release} -Provides: libzpool5-devel = %{version}-%{release} +Provides: libzpool6-devel = %{version}-%{release} Provides: libnvpair3-devel = %{version}-%{release} Provides: libuutil3-devel = %{version}-%{release} Obsoletes: zfs-devel <= %{version} Obsoletes: libzfs2-devel <= %{version} Obsoletes: libzfs4-devel <= %{version} +Obsoletes: libzfs5-devel <= %{version} -%description -n libzfs5-devel +%description -n libzfs6-devel This package contains the header files needed for building additional applications against the ZFS libraries. @@ -290,7 +293,7 @@ Summary: Python %{python_version} wrapper for libzfs_core Group: Development/Languages/Python License: Apache-2.0 BuildArch: noarch -Requires: libzfs5 = %{version}-%{release} +Requires: libzfs6 = %{version}-%{release} Requires: libnvpair3 = %{version}-%{release} Requires: libffi Requires: python%{__python_pkg_version} @@ -534,7 +537,7 @@ systemctl --system daemon-reload >/dev/null || true %config(noreplace) %{_bashcompletiondir}/zfs %config(noreplace) %{_bashcompletiondir}/zpool -%files -n libzpool5 +%files -n libzpool6 %{_libdir}/libzpool.so.* %files -n libnvpair3 @@ -543,10 +546,10 @@ systemctl --system daemon-reload >/dev/null || true %files -n libuutil3 %{_libdir}/libuutil.so.* -%files -n libzfs5 +%files -n libzfs6 %{_libdir}/libzfs*.so.* -%files -n libzfs5-devel +%files -n libzfs6-devel %{_pkgconfigdir}/libzfs.pc %{_pkgconfigdir}/libzfsbootenv.pc %{_pkgconfigdir}/libzfs_core.pc diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index f89a4b3e0aae..fc4adc42d00a 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -697,8 +697,8 @@ tags = ['functional', 'delegate'] tests = ['dio_aligned_block', 'dio_async_always', 'dio_async_fio_ioengines', 'dio_compression', 'dio_dedup', 'dio_encryption', 'dio_grow_block', 'dio_max_recordsize', 'dio_mixed', 'dio_mmap', 'dio_overwrites', - 'dio_property', 'dio_random', 'dio_recordsize', 'dio_unaligned_block', - 'dio_unaligned_filesize'] + 'dio_property', 'dio_random', 'dio_read_verify', 'dio_recordsize', + 'dio_unaligned_block', 'dio_unaligned_filesize'] tags = ['functional', 'direct'] [tests/functional/exec] diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c index 714f42200557..173581094443 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2022 by Triad National Security, LLC. + * Copyright (c) 2024 by Triad National Security, LLC. */ #include <sys/types.h> @@ -39,51 +39,59 @@ #define MIN(a, b) ((a) < (b)) ? (a) : (b) #endif -static char *outputfile = NULL; +static char *filename = NULL; static int blocksize = 131072; /* 128K */ -static int wr_err_expected = 0; +static int err_expected = 0; +static int read_op = 0; +static int write_op = 0; static int numblocks = 100; static char *execname = NULL; static int print_usage = 0; static int randompattern = 0; -static int ofd; +static int fd; char *buf = NULL; typedef struct { - int entire_file_written; + int entire_file_completed; } pthread_args_t; static void usage(void) { (void) fprintf(stderr, - "usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n" - " [-n numblocks] [-p randpattern] [-h help]\n" + "usage %s -f filename [-b blocksize] [-e wr_error_expected]\n" + " [-n numblocks] [-p randompattern] -r read_op \n" + " -w write_op [-h help]\n" "\n" "Testing whether checksum verify works correctly for O_DIRECT.\n" "when manipulating the contents of a userspace buffer.\n" "\n" - " outputfile: File to write to.\n" - " blocksize: Size of each block to write (must be at \n" - " least >= 512).\n" - " wr_err_expected: Whether pwrite() is expected to return EIO\n" - " while manipulating the contents of the\n" - " buffer.\n" - " numblocks: Total number of blocksized blocks to\n" - " write.\n" - " randpattern: Fill data buffer with random data. Default\n" - " behavior is to fill the buffer with the \n" - " known data pattern (0xdeadbeef).\n" + " filename: File to read or write to.\n" + " blocksize: Size of each block to write (must be at \n" + " least >= 512).\n" + " err_expected: Whether write() is expected to return EIO\n" + " while manipulating the contents of the\n" + " buffer.\n" + " numblocks: Total number of blocksized blocks to\n" + " write.\n" + " read_op: Perform reads to the filename file while\n" + " while manipulating the buffer contents\n" + " write_op: Perform writes to the filename file while\n" + " manipulating the buffer contents\n" + " randompattern: Fill data buffer with random data for \n" + " writes. Default behavior is to fill the \n" + " buffer with known data pattern (0xdeadbeef)\n" " help: Print usage information and exit.\n" "\n" " Required parameters:\n" - " outputfile\n" + " filename\n" + " read_op or write_op\n" "\n" " Default Values:\n" " blocksize -> 131072\n" " wr_err_expexted -> false\n" " numblocks -> 100\n" - " randpattern -> false\n", + " randompattern -> false\n", execname); (void) exit(1); } @@ -97,16 +105,21 @@ parse_options(int argc, char *argv[]) extern int optind, optopt; execname = argv[0]; - while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) { + while ((c = getopt(argc, argv, "b:ef:hn:rw")) != -1) { switch (c) { case 'b': blocksize = atoi(optarg); break; case 'e': - wr_err_expected = 1; + err_expected = 1; break; + case 'f': + filename = optarg; + break; + + case 'h': print_usage = 1; break; @@ -115,12 +128,12 @@ parse_options(int argc, char *argv[]) numblocks = atoi(optarg); break; - case 'o': - outputfile = optarg; + case 'r': + read_op = 1; break; - case 'p': - randompattern = 1; + case 'w': + write_op = 1; break; case ':': @@ -141,7 +154,8 @@ parse_options(int argc, char *argv[]) if (errflag || print_usage == 1) (void) usage(); - if (blocksize < 512 || outputfile == NULL || numblocks <= 0) { + if (blocksize < 512 || filename == NULL || numblocks <= 0 || + (read_op == 0 && write_op == 0)) { (void) fprintf(stderr, "Required paramater(s) missing or invalid.\n"); (void) usage(); @@ -160,10 +174,10 @@ write_thread(void *arg) ssize_t wrote = 0; pthread_args_t *args = (pthread_args_t *)arg; - while (!args->entire_file_written) { - wrote = pwrite(ofd, buf, blocksize, offset); + while (!args->entire_file_completed) { + wrote = pwrite(fd, buf, blocksize, offset); if (wrote != blocksize) { - if (wr_err_expected) + if (err_expected) assert(errno == EIO); else exit(2); @@ -173,7 +187,35 @@ write_thread(void *arg) left -= blocksize; if (left == 0) - args->entire_file_written = 1; + args->entire_file_completed = 1; + } + + pthread_exit(NULL); +} + +/* + * Read blocksize * numblocks to the file using O_DIRECT. + */ +static void * +read_thread(void *arg) +{ + size_t offset = 0; + int total_data = blocksize * numblocks; + int left = total_data; + ssize_t read = 0; + pthread_args_t *args = (pthread_args_t *)arg; + + while (!args->entire_file_completed) { + read = pread(fd, buf, blocksize, offset); + if (read != blocksize) { + exit(2); + } + + offset = ((offset + blocksize) % total_data); + left -= blocksize; + + if (left == 0) + args->entire_file_completed = 1; } pthread_exit(NULL); @@ -189,7 +231,7 @@ manipulate_buf_thread(void *arg) char rand_char; pthread_args_t *args = (pthread_args_t *)arg; - while (!args->entire_file_written) { + while (!args->entire_file_completed) { rand_offset = (rand() % blocksize); rand_char = (rand() % (126 - 33) + 33); buf[rand_offset] = rand_char; @@ -202,9 +244,9 @@ int main(int argc, char *argv[]) { const char *datapattern = "0xdeadbeef"; - int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT; + int fd_flags = O_DIRECT; mode_t mode = S_IRUSR | S_IWUSR; - pthread_t write_thr; + pthread_t io_thr; pthread_t manipul_thr; int left = blocksize; int offset = 0; @@ -213,9 +255,15 @@ main(int argc, char *argv[]) parse_options(argc, argv); - ofd = open(outputfile, ofd_flags, mode); - if (ofd == -1) { - (void) fprintf(stderr, "%s, %s\n", execname, outputfile); + if (write_op) { + fd_flags |= (O_WRONLY | O_CREAT); + } else { + fd_flags |= O_RDONLY; + } + + fd = open(filename, fd_flags, mode); + if (fd == -1) { + (void) fprintf(stderr, "%s, %s\n", execname, filename); perror("open"); exit(2); } @@ -228,24 +276,22 @@ main(int argc, char *argv[]) exit(2); } - if (!randompattern) { - /* Putting known data pattern in buffer */ - while (left) { - size_t amt = MIN(strlen(datapattern), left); - memcpy(&buf[offset], datapattern, amt); - offset += amt; - left -= amt; + if (write_op) { + if (!randompattern) { + /* Putting known data pattern in buffer */ + while (left) { + size_t amt = MIN(strlen(datapattern), left); + memcpy(&buf[offset], datapattern, amt); + offset += amt; + left -= amt; + } + } else { + /* Putting random data in buffer */ + for (int i = 0; i < blocksize; i++) + buf[i] = rand(); } - } else { - /* Putting random data in buffer */ - for (int i = 0; i < blocksize; i++) - buf[i] = rand(); } - /* - * Writing using O_DIRECT while manipulating the buffer contents until - * the entire file is written. - */ if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread, &args))) { fprintf(stderr, "error: pthreads_create, manipul_thr, " @@ -253,18 +299,34 @@ main(int argc, char *argv[]) exit(2); } - if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) { - fprintf(stderr, "error: pthreads_create, write_thr, " - "rc: %d\n", rc); - exit(2); + if (write_op) { + /* + * Writing using O_DIRECT while manipulating the buffer contents + * until the entire file is written. + */ + if ((rc = pthread_create(&io_thr, NULL, write_thread, &args))) { + fprintf(stderr, "error: pthreads_create, io_thr, " + "rc: %d\n", rc); + exit(2); + } + } else { + /* + * Reading using O_DIRECT while manipulating the buffer contents + * until the entire file is read. + */ + if ((rc = pthread_create(&io_thr, NULL, read_thread, &args))) { + fprintf(stderr, "error: pthreads_create, io_thr, " + "rc: %d\n", rc); + exit(2); + } } - pthread_join(write_thr, NULL); + pthread_join(io_thr, NULL); pthread_join(manipul_thr, NULL); - assert(args.entire_file_written == 1); + assert(args.entire_file_completed == 1); - (void) close(ofd); + (void) close(fd); free(buf); diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index 206ee8ac1542..bc767b9f624f 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1477,6 +1477,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/direct/dio_overwrites.ksh \ functional/direct/dio_property.ksh \ functional/direct/dio_random.ksh \ + functional/direct/dio_read_verify.ksh \ functional/direct/dio_recordsize.ksh \ functional/direct/dio_unaligned_block.ksh \ functional/direct/dio_unaligned_filesize.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh index e598dd57181e..d092a3b0e828 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh @@ -30,28 +30,39 @@ # STRATEGY: # 1. Run different zfs/zpool -j commands and check for valid JSON +# +# -j and --json mean the same thing. Each command will be run twice, replacing +# JSONFLAG with the flag under test. list=( - "zpool status -j -g --json-int --json-flat-vdevs --json-pool-key-guid" - "zpool status -p -j -g --json-int --json-flat-vdevs --json-pool-key-guid" - "zpool status -j -c upath" - "zpool status -j" - "zpool status -j testpool1" - "zpool list -j" - "zpool list -j -g" - "zpool list -j -o fragmentation" - "zpool get -j size" - "zpool get -j all" - "zpool version -j" - "zfs list -j" - "zfs list -j testpool1" - "zfs get -j all" - "zfs get -j available" - "zfs mount -j" - "zfs version -j" + "zpool status JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid" + "zpool status -p JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid" + "zpool status JSONFLAG -c upath" + "zpool status JSONFLAG" + "zpool status JSONFLAG testpool1" + "zpool list JSONFLAG" + "zpool list JSONFLAG -g" + "zpool list JSONFLAG -o fragmentation" + "zpool get JSONFLAG size" + "zpool get JSONFLAG all" + "zpool version JSONFLAG" + "zfs list JSONFLAG" + "zfs list JSONFLAG testpool1" + "zfs get JSONFLAG all" + "zfs get JSONFLAG available" + "zfs mount JSONFLAG" + "zfs version JSONFLAG" ) -for cmd in "${list[@]}" ; do - log_must eval "$cmd | jq > /dev/null" -done +function run_json_tests +{ + typeset flag=$1 + for cmd in "${list[@]}" ; do + cmd=${cmd//JSONFLAG/$flag} + log_must eval "$cmd | jq > /dev/null" + done +} + +log_must run_json_tests -j +log_must run_json_tests --json log_pass "zpool and zfs commands outputted valid JSON" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh index 71b2437a37ec..57a7412e37cc 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh @@ -113,7 +113,7 @@ wait parallel_time=$SECONDS log_note "asyncronously imported 4 pools in $parallel_time seconds" -log_must test $parallel_time -lt $(($sequential_time / 3)) +log_must test $parallel_time -lt $(($sequential_time / 2)) # # export pools with import delay injectors @@ -132,6 +132,6 @@ log_must zpool import -a -d $DEVICE_DIR -f parallel_time=$SECONDS log_note "asyncronously imported 4 pools in $parallel_time seconds" -log_must test $parallel_time -lt $(($sequential_time / 3)) +log_must test $parallel_time -lt $(($sequential_time / 2)) log_pass "Pool imports occur in parallel" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh index 326152b510a9..b1657648b5a1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh @@ -221,7 +221,7 @@ function ddt_dedup_vdev_limit # For here, we just set the entry count a little higher than what we # expect to allow for some instability. # - log_must test $(ddt_entries) -le 600000 + log_must test $(ddt_entries) -le 650000 do_clean } diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib index 3a70cf293967..5b3f893e1ce1 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib @@ -84,8 +84,9 @@ function get_zpool_status_chksum_verify_failures # pool_name vdev_type function get_zed_dio_verify_events # pool { typeset pool=$1 + typeset op=$2 - val=$(zpool events $pool | grep -c dio_verify) + val=$(zpool events $pool | grep -c "dio_verify_${op}") echo "$val" } @@ -96,11 +97,12 @@ function get_zed_dio_verify_events # pool # zpool events # After getting that counts will clear the out the ZPool errors and events # -function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors +function check_dio_chksum_verify_failures # pool vdev_type op expect_errors { typeset pool=$1 typeset vdev_type=$2 typeset expect_errors=$3 + typeset op=$4 typeset note_str="expecting none" if [[ $expect_errors -ne 0 ]]; then @@ -108,10 +110,10 @@ function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors fi log_note "Checking for Direct I/O write checksum verify errors \ - $note_str on ZPool: $pool" + $note_str on ZPool: $pool with $vdev_type" status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type) - zed_dio_verify_events=$(get_zed_dio_verify_events $pool) + zed_dio_verify_events=$(get_zed_dio_verify_events $pool $op) if [[ $expect_errors -ne 0 ]]; then if [[ $status_failures -eq 0 || diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh new file mode 100755 index 000000000000..456d429b1d99 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh @@ -0,0 +1,107 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify checksum verify works for Direct I/O reads. +# +# STRATEGY: +# 1. Create a zpool from each vdev type. +# 2. Start a Direct I/O read workload while manipulating the user buffer +# contents. +# 3. Verify there are Direct I/O read verify failures using +# zpool status -d and checking for zevents. We also make sure there +# are reported no data errors. +# + +verify_runnable "global" + +log_assert "Verify checksum verify works for Direct I/O reads." + +log_onexit dio_cleanup + +NUMBLOCKS=300 +BS=$((128 * 1024)) # 128k + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +# We will verify that there are no checksum errors for every Direct I/O read +# while manipulating the buffer contents while the I/O is still in flight and +# also that Direct I/O checksum verify failures and dio_verify_rd zevents are +# reported. + + +for type in "" "mirror" "raidz" "draid"; do + typeset vdev_type=$type + if [[ "${vdev_type}" == "" ]]; then + vdev_type="stripe" + fi + + log_note "Verifying every Direct I/O read verify with VDEV type \ + ${vdev_type}" + + create_pool $TESTPOOL1 $type $DIO_VDEVS + log_must eval "zfs create -o recordsize=128k -o compression=off \ + $TESTPOOL1/$TESTFS1" + + mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + prev_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count) + prev_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count) + + # Create the file before trying to manipulate the contents + log_must stride_dd -o "$mntpnt/direct-write.iso" -i /dev/urandom \ + -b $BS -c $NUMBLOCKS -D + # Manipulate the buffer contents will reading the file with Direct I/O + log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -r + + # Getting new Direct I/O and ARC Write counts. + curr_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count) + curr_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count) + total_dio_rd=$((curr_dio_rd - prev_dio_rd)) + total_arc_rd=$((curr_arc_rd - prev_arc_rd)) + + log_note "Making sure there are no checksum errors with the ZPool" + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + log_note "Making sure we have Direct I/O and ARC reads logged" + if [[ $total_dio_rd -lt 1 ]]; then + log_fail "No Direct I/O reads $total_dio_rd" + fi + if [[ $total_arc_rd -lt 1 ]]; then + log_fail "No ARC reads $total_arc_rd" + fi + + log_note "Making sure we have Direct I/O write checksum verifies with ZPool" + check_dio_chksum_verify_failures "$TESTPOOL1" "$vdev_type" 1 "rd" + destroy_pool $TESTPOOL1 +done + +log_pass "Verified checksum verify works for Direct I/O reads." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh index efc9ee639184..ccdabc678a68 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh @@ -46,7 +46,7 @@ verify_runnable "global" function cleanup { log_must rm -f "$mntpnt/direct-write.iso" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 + check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr" } log_assert "Verify stable pages work for Direct I/O writes." @@ -76,8 +76,8 @@ do # Manipulate the user's buffer while running O_DIRECT write # workload with the buffer. - log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ - -n $NUMBLOCKS -b $BS + log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -w # Reading back the contents of the file log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh index 536459a35e6c..4eb9efe95ef1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh @@ -91,8 +91,8 @@ log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0 log_note "Verifying no panics for Direct I/O writes with compression" log_must zfs set compression=on $TESTPOOL/$TESTFS prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) -log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" -n $NUMBLOCKS \ - -b $BS +log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" -n $NUMBLOCKS \ + -b $BS -w curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) total_dio_wr=$((curr_dio_wr - prev_dio_wr)) @@ -116,8 +116,8 @@ for i in $(seq 1 $ITERATIONS); do $i of $ITERATIONS with zfs_vdev_direct_write_verify=0" prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ - -n $NUMBLOCKS -b $BS + log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -w # Reading file back to verify checksum errors filesize=$(get_file_size "$mntpnt/direct-write.iso") @@ -144,7 +144,7 @@ for i in $(seq 1 $ITERATIONS); do fi log_note "Making sure we have no Direct I/O write checksum verifies \ with ZPool" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 + check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr" log_must rm -f "$mntpnt/direct-write.iso" done @@ -166,8 +166,8 @@ for i in $(seq 1 $ITERATIONS); do $ITERATIONS with zfs_vdev_direct_write_verify=1" prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ - -n $NUMBLOCKS -b $BS -e + log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -e -w # Reading file back to verify there no are checksum errors filesize=$(get_file_size "$mntpnt/direct-write.iso") @@ -175,7 +175,7 @@ for i in $(seq 1 $ITERATIONS); do log_must stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \ -c $num_blocks - # Getting new Direct I/O and ARC Write counts. + # Getting new Direct I/O write counts. curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) total_dio_wr=$((curr_dio_wr - prev_dio_wr)) @@ -188,7 +188,7 @@ for i in $(seq 1 $ITERATIONS); do fi log_note "Making sure we have Direct I/O write checksum verifies with ZPool" - check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1 + check_dio_chksum_verify_failures "$TESTPOOL" "raidz" 1 "wr" done log_must rm -f "$mntpnt/direct-write.iso" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh index 629870adf9ea..101fa410bdb6 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh @@ -48,6 +48,8 @@ function cleanup log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME log_must set_tunable32 SCAN_SUSPEND_PROGRESS \ $ORIG_SCAN_SUSPEND_PROGRESS + log_must set_tunable32 RESILVER_DEFER_PERCENT \ + $ORIG_RESILVER_DEFER_PERCENT log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX log_must zinject -c all destroy_pool $TESTPOOL1 @@ -90,6 +92,7 @@ log_assert "Check for unnecessary resilver restarts" ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS) ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS) +ORIG_RESILVER_DEFER_PERCENT=$(get_tunable RESILVER_DEFER_PERCENT) ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX) set -A RESTARTS -- '1' '2' '2' '2' diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh index 1fc2d2780b1c..af780b628ce4 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh @@ -117,5 +117,18 @@ log_must zfs set snapdev=visible $TESTPOOL verify_inherited 'snapdev' 'hidden' $SUBZVOL $VOLFS blockdev_missing $SUBSNAPDEV blockdev_exists $SNAPDEV +log_must zfs destroy $SNAP + +# 4. Verify "rename" is correctly reflected when "snapdev=visible" +# 4.1 First create a snapshot and verify the device is present +log_must zfs snapshot $SNAP +log_must zfs set snapdev=visible $ZVOL +blockdev_exists $SNAPDEV +# 4.2 rename the snapshot and verify the devices are updated +log_must zfs rename $SNAP $SNAP-new +blockdev_missing $SNAPDEV +blockdev_exists $SNAPDEV-new +# 4.3 cleanup +log_must zfs destroy $SNAP-new log_pass "ZFS volume property 'snapdev' works as expected" diff --git a/sys/contrib/openzfs/udev/vdev_id b/sys/contrib/openzfs/udev/vdev_id index 7b5aab141997..5897fea3ed66 100755 --- a/sys/contrib/openzfs/udev/vdev_id +++ b/sys/contrib/openzfs/udev/vdev_id @@ -124,6 +124,7 @@ TOPOLOGY= BAY= ENCL_ID="" UNIQ_ENCL_ID="" +ZPAD=1 usage() { cat << EOF @@ -154,7 +155,7 @@ map_slot() { if [ -z "$MAPPED_SLOT" ] ; then MAPPED_SLOT=$LINUX_SLOT fi - printf "%d" "${MAPPED_SLOT}" + printf "%0${ZPAD}d" "${MAPPED_SLOT}" } map_channel() { @@ -430,6 +431,15 @@ sas_handler() { d=$(eval echo '$'{$i}) SLOT=$(echo "$d" | sed -e 's/^.*://') ;; + "bay_lun") + # Like 'bay' but with the LUN number appened. Added for SAS + # multi-actuator HDDs, where one physical drive has multiple + # LUNs, thus multiple logical drives share the same bay number + i=$((i + 2)) + d=$(eval echo '$'{$i}) + LUN="-lun$(echo "$d" | sed -e 's/^.*://')" + SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null) + ;; "ses") # look for this SAS path in all SCSI Enclosure Services # (SES) enclosures @@ -460,7 +470,7 @@ sas_handler() { if [ -z "$CHAN" ] ; then return fi - echo "${CHAN}"-"${JBOD}"-"${SLOT}${PART}" + echo "${CHAN}"-"${JBOD}"-"${SLOT}${LUN}${PART}" else CHAN=$(map_channel "$PCI_ID" "$PORT") SLOT=$(map_slot "$SLOT" "$CHAN") @@ -468,7 +478,7 @@ sas_handler() { if [ -z "$CHAN" ] ; then return fi - echo "${CHAN}${SLOT}${PART}" + echo "${CHAN}${SLOT}${LUN}${PART}" fi } @@ -748,6 +758,8 @@ if [ -z "$BAY" ] ; then BAY=$(awk '($1 == "slot") {print $2; exit}' "$CONFIG") fi +ZPAD=$(awk '($1 == "zpad_slot") {print $2; exit}' "$CONFIG") + TOPOLOGY=${TOPOLOGY:-sas_direct} # Should we create /dev/by-enclosure symlinks? diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 5dc43281242f..91bb885be72f 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -687,15 +687,6 @@ /* All required iov_iter interfaces are available */ /* #undef HAVE_VFS_IOV_ITER */ -/* fops->iterate() is available */ -/* #undef HAVE_VFS_ITERATE */ - -/* fops->iterate_shared() is available */ -/* #undef HAVE_VFS_ITERATE_SHARED */ - -/* fops->readdir() is available */ -/* #undef HAVE_VFS_READDIR */ - /* address_space_operations->readpages exists */ /* #undef HAVE_VFS_READPAGES */ @@ -801,7 +792,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.3.0-rc1-FreeBSD_g3a9fca901" +#define ZFS_META_ALIAS "zfs-2.3.99-31-FreeBSD_gb2f6de7b5" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -831,10 +822,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "rc1-FreeBSD_g3a9fca901" +#define ZFS_META_RELEASE "31-FreeBSD_gb2f6de7b5" /* Define the project version. */ -#define ZFS_META_VERSION "2.3.0" +#define ZFS_META_VERSION "2.3.99" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 3a1480d8dc66..e090ccb924f2 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.3.0-rc1-0-g3a9fca901" +#define ZFS_META_GITREV "zfs-2.3.99-31-gb2f6de7b5" |