diff options
author | Martin Matuska <mm@FreeBSD.org> | 2023-04-23 10:12:49 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2023-04-23 10:12:49 +0000 |
commit | 7005cd44040529b55573cff6212fde9e3d845215 (patch) | |
tree | f192e8551db7c20aeb0a9a94d875a30ed04bb829 | |
parent | 4b0899ae6686fad5e1ef6dec6e812c94ffadcca1 (diff) | |
parent | e25f9131d679692704c11dc0c1df6d4585b70c35 (diff) |
zfs: merge openzfs/zfs@e25f9131d (zfs-2.1-release) into stable/13
OpenZFS release 2.1.11
Notable upstream pull requeset merges:
#13368 ZFS_IOC_COUNT_FILLED does unnecessary txg_wait_synced() (reverted)
#13758 Allow mounting snapshots in .zfs/snapshot as a regular user
#13816 Fix a race condition in dsl_dataset_sync() when activating features
#14039 Optimize microzaps
#14196 Remove atomics from zh_refcount
#14198 Switch dnode stats to wmsums
#14199 Remove few pointer dereferences in dbuf_read()
#14200 Micro-optimize zrl_remove()
#14218 Avoid a null pointer dereference in zfs_mount() on FreeBSD
#14293 Turn default_bs and default_ibs into ZFS_MODULE_PARAMs
#14317 Pack zrlock_t by 8 bytes
#14402 Prefetch on deadlists merge
#14418 Introduce minimal ZIL block commit delay
#14448 Fix console progress reporting for recursive send
#14454 Improve arc_read() error reporting
#14502 Partially revert f806306ce0
"Activate filesystem features only in syncing context"
#14516 System-wide speculative prefetch limit
#14523 Move dmu_buf_rele() after dsl_dataset_sync_done()
#14563 Optimize the is_l2cacheable functions
#14573 Add missing increment to dsl_deadlist_move_bpobj()
#14621 Colorize patchset
#14639 FreeBSD: Remove extra arc_reduce_target_size() call
#14641 Additional limits on hole reporting
#14653 Update vdev state for spare vdev
#14712 Fix "Add colored output to zfs list"
#14761 Revert "ZFS_IOC_COUNT_FILLED does unnecessary txg_wait_synced()"
Obtained from: OpenZFS
OpenZFS tag: zfs-2.1.11
OpenZFS commit: e25f9131d679692704c11dc0c1df6d4585b70c35
Relnotes: yes
116 files changed, 2194 insertions, 1972 deletions
diff --git a/sys/contrib/openzfs/.github/workflows/build-dependencies.txt b/sys/contrib/openzfs/.github/workflows/build-dependencies.txt new file mode 100644 index 000000000000..73921865c42a --- /dev/null +++ b/sys/contrib/openzfs/.github/workflows/build-dependencies.txt @@ -0,0 +1,57 @@ +acl +alien +attr +autoconf +bc +build-essential +curl +dbench +debhelper-compat +dh-python +dkms +fakeroot +fio +gdb +gdebi +git +ksh +lcov +libacl1-dev +libaio-dev +libattr1-dev +libblkid-dev +libcurl4-openssl-dev +libdevmapper-dev +libelf-dev +libffi-dev +libmount-dev +libpam0g-dev +libselinux1-dev +libssl-dev +libtool +libudev-dev +linux-headers-generic +lsscsi +mdadm +nfs-kernel-server +pamtester +parted +po-debconf +python3 +python3-all-dev +python3-cffi +python3-dev +python3-packaging +python3-pip +python3-setuptools +python3-sphinx +rng-tools-debian +rsync +samba +sysstat +uuid-dev +watchdog +wget +xfslibs-dev +xz-utils +zlib1g-dev diff --git a/sys/contrib/openzfs/.github/workflows/checkstyle-dependencies.txt b/sys/contrib/openzfs/.github/workflows/checkstyle-dependencies.txt new file mode 100644 index 000000000000..cc68905d8d36 --- /dev/null +++ b/sys/contrib/openzfs/.github/workflows/checkstyle-dependencies.txt @@ -0,0 +1,2 @@ +pax-utils +shellcheck diff --git a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml index 7b506262593f..1a31f061eba4 100644 --- a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml +++ b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml @@ -6,20 +6,27 @@ on: jobs: checkstyle: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install --yes -qq build-essential autoconf libtool gawk alien fakeroot linux-headers-$(uname -r) - sudo apt-get install --yes -qq zlib1g-dev uuid-dev libattr1-dev libblkid-dev libselinux-dev libudev-dev libssl-dev python-dev python-setuptools python-cffi python3 python3-dev python3-setuptools python3-cffi - # packages for tests - sudo apt-get install --yes -qq parted lsscsi ksh attr acl nfs-kernel-server fio - sudo apt-get install --yes -qq mandoc cppcheck pax-utils devscripts - sudo -E pip --quiet install flake8 + # https://github.com/orgs/community/discussions/47863 + sudo apt-mark hold grub-efi-amd64-signed + sudo apt-get update --fix-missing + sudo apt-get upgrade + sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/build-dependencies.txt apt-get install -qq + sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/checkstyle-dependencies.txt apt-get install -qq + sudo python3 -m pip install --quiet flake8 + sudo apt-get clean + + # confirm that the tools are installed + # the build system doesn't fail when they are not + flake8 --version + scanelf --version + shellcheck --version - name: Prepare run: | sh ./autogen.sh diff --git a/sys/contrib/openzfs/.github/workflows/scripts/reclaim_disk_space.sh b/sys/contrib/openzfs/.github/workflows/scripts/reclaim_disk_space.sh new file mode 100755 index 000000000000..ed23ce31d85c --- /dev/null +++ b/sys/contrib/openzfs/.github/workflows/scripts/reclaim_disk_space.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -eu + +# remove 4GiB of images +sudo systemd-run docker system prune --force --all --volumes + +# remove unused software +sudo systemd-run --wait rm -rf \ + "$AGENT_TOOLSDIRECTORY" \ + /opt/* \ + /usr/local/* \ + /usr/share/az* \ + /usr/share/dotnet \ + /usr/share/gradle* \ + /usr/share/miniconda \ + /usr/share/swift \ + /var/lib/gems \ + /var/lib/mysql \ + /var/lib/snapd + +# trim the cleaned space +sudo fstrim / diff --git a/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml b/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml index c2a8dec99658..1d30ddc645a4 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-tests-functional.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - os: [20.04] + os: [20.04, 22.04] runs-on: ubuntu-${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -17,17 +17,12 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install --yes -qq build-essential autoconf libtool gdb lcov \ - git alien fakeroot wget curl bc fio acl \ - sysstat mdadm lsscsi parted gdebi attr dbench watchdog ksh \ - nfs-kernel-server samba rng-tools xz-utils \ - zlib1g-dev uuid-dev libblkid-dev libselinux-dev \ - xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ - libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ - libpam0g-dev pamtester python-dev python-setuptools python-cffi \ - python3 python3-dev python3-setuptools python3-cffi python3-packaging \ - libcurl4-openssl-dev + # https://github.com/orgs/community/discussions/47863 + sudo apt-mark hold grub-efi-amd64-signed + sudo apt-get update --fix-missing + sudo apt-get upgrade + sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/build-dependencies.txt apt-get install -qq + sudo apt-get clean - name: Autogen.sh run: | sh autogen.sh @@ -56,12 +51,13 @@ jobs: sudo udevadm control --reload-rules fi fi - # Workaround to provide additional free space for testing. - # https://github.com/actions/virtual-environments/issues/2840 - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Clear the kernel ring buffer + run: | + sudo dmesg -c >/var/tmp/dmesg-prerun + - name: Reclaim and report disk space + run: | + ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh + df -h / - name: Tests run: | /usr/share/zfs/zfs-tests.sh -vR -s 3G @@ -71,7 +67,7 @@ jobs: run: | RESULTS_PATH=$(readlink -f /var/tmp/test_results/current) sudo dmesg > $RESULTS_PATH/dmesg - sudo cp /var/log/syslog $RESULTS_PATH/ + sudo cp /var/log/syslog /var/tmp/dmesg-prerun $RESULTS_PATH/ sudo chmod +r $RESULTS_PATH/* # Replace ':' in dir names, actions/upload-artifact doesn't support it for f in $(find /var/tmp/test_results -name '*:*'); do mv "$f" "${f//:/__}"; done @@ -79,5 +75,7 @@ jobs: if: failure() with: name: Test logs Ubuntu-${{ matrix.os }} - path: /var/tmp/test_results/20*/ + path: | + /var/tmp/test_results/* + !/var/tmp/test_results/current if-no-files-found: ignore diff --git a/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml b/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml index c56355623002..6a1432c2972f 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-tests-sanity.yml @@ -6,24 +6,19 @@ on: jobs: tests: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install --yes -qq build-essential autoconf libtool gdb lcov \ - git alien fakeroot wget curl bc fio acl \ - sysstat mdadm lsscsi parted gdebi attr dbench watchdog ksh \ - nfs-kernel-server samba rng-tools xz-utils \ - zlib1g-dev uuid-dev libblkid-dev libselinux-dev \ - xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ - libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ - libpam0g-dev pamtester python-dev python-setuptools python-cffi \ - python3 python3-dev python3-setuptools python3-cffi python3-packaging \ - libcurl4-openssl-dev + # https://github.com/orgs/community/discussions/47863 + sudo apt-mark hold grub-efi-amd64-signed + sudo apt-get update --fix-missing + sudo apt-get upgrade + sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/build-dependencies.txt apt-get install -qq + sudo apt-get clean - name: Autogen.sh run: | sh autogen.sh @@ -52,12 +47,13 @@ jobs: sudo udevadm control --reload-rules fi fi - # Workaround to provide additional free space for testing. - # https://github.com/actions/virtual-environments/issues/2840 - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Clear the kernel ring buffer + run: | + sudo dmesg -c >/var/tmp/dmesg-prerun + - name: Reclaim and report disk space + run: | + ${{ github.workspace }}/.github/workflows/scripts/reclaim_disk_space.sh + df -h / - name: Tests run: | /usr/share/zfs/zfs-tests.sh -vR -s 3G -r sanity @@ -67,13 +63,15 @@ jobs: run: | RESULTS_PATH=$(readlink -f /var/tmp/test_results/current) sudo dmesg > $RESULTS_PATH/dmesg - sudo cp /var/log/syslog $RESULTS_PATH/ + sudo cp /var/log/syslog /var/tmp/dmesg-prerun $RESULTS_PATH/ sudo chmod +r $RESULTS_PATH/* # Replace ':' in dir names, actions/upload-artifact doesn't support it for f in $(find /var/tmp/test_results -name '*:*'); do mv "$f" "${f//:/__}"; done - uses: actions/upload-artifact@v3 if: failure() with: - name: Test logs - path: /var/tmp/test_results/20*/ + name: Test logs Ubuntu-${{ matrix.os }} + path: | + /var/tmp/test_results/* + !/var/tmp/test_results/current if-no-files-found: ignore diff --git a/sys/contrib/openzfs/.github/workflows/zloop.yml b/sys/contrib/openzfs/.github/workflows/zloop.yml index 1c42491ee912..440ec01faa9f 100644 --- a/sys/contrib/openzfs/.github/workflows/zloop.yml +++ b/sys/contrib/openzfs/.github/workflows/zloop.yml @@ -6,7 +6,7 @@ on: jobs: tests: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 env: TEST_DIR: /var/tmp/zloop steps: @@ -15,15 +15,12 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install --yes -qq build-essential autoconf libtool gdb \ - git alien fakeroot \ - zlib1g-dev uuid-dev libblkid-dev libselinux-dev \ - xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \ - libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \ - libpam0g-dev \ - python-dev python-setuptools python-cffi python-packaging \ - python3 python3-dev python3-setuptools python3-cffi python3-packaging + # https://github.com/orgs/community/discussions/47863 + sudo apt-mark hold grub-efi-amd64-signed + sudo apt-get update --fix-missing + sudo apt-get upgrade + sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/build-dependencies.txt apt-get install -qq + sudo apt-get clean - name: Autogen.sh run: | sh autogen.sh @@ -44,8 +41,9 @@ jobs: - name: Tests run: | sudo mkdir -p $TEST_DIR - # run for 20 minutes to have a total runner time of 30 minutes - sudo /usr/share/zfs/zloop.sh -t 1200 -l -m1 -- -T 120 -P 60 + # run for 10 minutes or at most 2 iterations for a maximum runner + # time of 20 minutes. + sudo /usr/share/zfs/zloop.sh -t 600 -I 2 -l -m1 -- -T 120 -P 60 - name: Prepare artifacts if: failure() run: | diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index e9a809aef3b8..c2db34689128 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.1.9 +Version: 2.1.11 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.1 +Linux-Maximum: 6.2 Linux-Minimum: 3.10 diff --git a/sys/contrib/openzfs/cmd/arc_summary/Makefile.am b/sys/contrib/openzfs/cmd/arc_summary/Makefile.am index 1a26c2c199f8..f419f07e0eda 100644 --- a/sys/contrib/openzfs/cmd/arc_summary/Makefile.am +++ b/sys/contrib/openzfs/cmd/arc_summary/Makefile.am @@ -1,13 +1,8 @@ bin_SCRIPTS = arc_summary CLEANFILES = arc_summary -EXTRA_DIST = arc_summary2 arc_summary3 - -if USING_PYTHON_2 -SCRIPT = arc_summary2 -else +EXTRA_DIST = arc_summary3 SCRIPT = arc_summary3 -endif arc_summary: $(SCRIPT) cp $< $@ diff --git a/sys/contrib/openzfs/cmd/arc_summary/arc_summary2 b/sys/contrib/openzfs/cmd/arc_summary/arc_summary2 deleted file mode 100755 index 3302a802d146..000000000000 --- a/sys/contrib/openzfs/cmd/arc_summary/arc_summary2 +++ /dev/null @@ -1,1180 +0,0 @@ -#!/usr/bin/env python2 -# -# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $ -# -# Copyright (c) 2008 Ben Rockwood <benr@cuddletech.com>, -# Copyright (c) 2010 Martin Matuska <mm@FreeBSD.org>, -# Copyright (c) 2010-2011 Jason J. Hellenthal <jhell@DataIX.net>, -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# If you are having troubles when using this script from cron(8) please try -# adjusting your PATH before reporting problems. -# -# Note some of this code uses older code (eg getopt instead of argparse, -# subprocess.Popen() instead of subprocess.run()) because we need to support -# some very old versions of Python. -# - -"""Print statistics on the ZFS Adjustable Replacement Cache (ARC) - -Provides basic information on the ARC, its efficiency, the L2ARC (if present), -the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the -in-source documentation and code at -https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details. -""" - -import getopt -import os -import sys -import time -import errno - -from subprocess import Popen, PIPE -from decimal import Decimal as D - - -if sys.platform.startswith('freebsd'): - # Requires py27-sysctl on FreeBSD - import sysctl - - def is_value(ctl): - return ctl.type != sysctl.CTLTYPE_NODE - - def load_kstats(namespace): - """Collect information on a specific subsystem of the ARC""" - - base = 'kstat.zfs.misc.%s.' % namespace - fmt = lambda kstat: (kstat.name, D(kstat.value)) - kstats = sysctl.filter(base) - return [fmt(kstat) for kstat in kstats if is_value(kstat)] - - def load_tunables(): - ctls = sysctl.filter('vfs.zfs') - return dict((ctl.name, ctl.value) for ctl in ctls if is_value(ctl)) - -elif sys.platform.startswith('linux'): - - def load_kstats(namespace): - """Collect information on a specific subsystem of the ARC""" - - kstat = 'kstat.zfs.misc.%s.%%s' % namespace - path = '/proc/spl/kstat/zfs/%s' % namespace - with open(path) as f: - entries = [line.strip().split() for line in f][2:] # Skip header - return [(kstat % name, D(value)) for name, _, value in entries] - - def load_tunables(): - basepath = '/sys/module/zfs/parameters' - tunables = {} - for name in os.listdir(basepath): - if not name: - continue - path = '%s/%s' % (basepath, name) - with open(path) as f: - value = f.read() - tunables[name] = value.strip() - return tunables - - -show_tunable_descriptions = False -alternate_tunable_layout = False - - -def get_Kstat(): - """Collect information on the ZFS subsystem from the /proc virtual - file system. The name "kstat" is a holdover from the Solaris utility - of the same name. - """ - - Kstat = {} - Kstat.update(load_kstats('arcstats')) - Kstat.update(load_kstats('zfetchstats')) - Kstat.update(load_kstats('vdev_cache_stats')) - return Kstat - - -def fBytes(b=0): - """Return human-readable representation of a byte value in - powers of 2 (eg "KiB" for "kibibytes", etc) to two decimal - points. Values smaller than one KiB are returned without - decimal points. - """ - - prefixes = [ - [2**80, "YiB"], # yobibytes (yotta) - [2**70, "ZiB"], # zebibytes (zetta) - [2**60, "EiB"], # exbibytes (exa) - [2**50, "PiB"], # pebibytes (peta) - [2**40, "TiB"], # tebibytes (tera) - [2**30, "GiB"], # gibibytes (giga) - [2**20, "MiB"], # mebibytes (mega) - [2**10, "KiB"]] # kibibytes (kilo) - - if b >= 2**10: - - for limit, unit in prefixes: - - if b >= limit: - value = b / limit - break - - result = "%0.2f\t%s" % (value, unit) - - else: - - result = "%d\tBytes" % b - - return result - - -def fHits(hits=0): - """Create a human-readable representation of the number of hits. - The single-letter symbols used are SI to avoid the confusion caused - by the different "short scale" and "long scale" representations in - English, which use the same words for different values. See - https://en.wikipedia.org/wiki/Names_of_large_numbers and - https://physics.nist.gov/cuu/Units/prefixes.html - """ - - numbers = [ - [10**24, 'Y'], # yotta (septillion) - [10**21, 'Z'], # zetta (sextillion) - [10**18, 'E'], # exa (quintrillion) - [10**15, 'P'], # peta (quadrillion) - [10**12, 'T'], # tera (trillion) - [10**9, 'G'], # giga (billion) - [10**6, 'M'], # mega (million) - [10**3, 'k']] # kilo (thousand) - - if hits >= 1000: - - for limit, symbol in numbers: - - if hits >= limit: - value = hits/limit - break - - result = "%0.2f%s" % (value, symbol) - - else: - - result = "%d" % hits - - return result - - -def fPerc(lVal=0, rVal=0, Decimal=2): - """Calculate percentage value and return in human-readable format""" - - if rVal > 0: - return str("%0." + str(Decimal) + "f") % (100 * (lVal / rVal)) + "%" - else: - return str("%0." + str(Decimal) + "f") % 100 + "%" - - -def get_arc_summary(Kstat): - """Collect general data on the ARC""" - - output = {} - memory_throttle_count = Kstat[ - "kstat.zfs.misc.arcstats.memory_throttle_count" - ] - - if memory_throttle_count > 0: - output['health'] = 'THROTTLED' - else: - output['health'] = 'HEALTHY' - - output['memory_throttle_count'] = fHits(memory_throttle_count) - - # ARC Misc. - deleted = Kstat["kstat.zfs.misc.arcstats.deleted"] - mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"] - evict_skip = Kstat["kstat.zfs.misc.arcstats.evict_skip"] - evict_l2_cached = Kstat["kstat.zfs.misc.arcstats.evict_l2_cached"] - evict_l2_eligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible"] - evict_l2_eligible_mfu = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mfu"] - evict_l2_eligible_mru = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mru"] - evict_l2_ineligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_ineligible"] - evict_l2_skip = Kstat["kstat.zfs.misc.arcstats.evict_l2_skip"] - - # ARC Misc. - output["arc_misc"] = {} - output["arc_misc"]["deleted"] = fHits(deleted) - output["arc_misc"]["mutex_miss"] = fHits(mutex_miss) - output["arc_misc"]["evict_skips"] = fHits(evict_skip) - output["arc_misc"]["evict_l2_skip"] = fHits(evict_l2_skip) - output["arc_misc"]["evict_l2_cached"] = fBytes(evict_l2_cached) - output["arc_misc"]["evict_l2_eligible"] = fBytes(evict_l2_eligible) - output["arc_misc"]["evict_l2_eligible_mfu"] = { - 'per': fPerc(evict_l2_eligible_mfu, evict_l2_eligible), - 'num': fBytes(evict_l2_eligible_mfu), - } - output["arc_misc"]["evict_l2_eligible_mru"] = { - 'per': fPerc(evict_l2_eligible_mru, evict_l2_eligible), - 'num': fBytes(evict_l2_eligible_mru), - } - output["arc_misc"]["evict_l2_ineligible"] = fBytes(evict_l2_ineligible) - - # ARC Sizing - arc_size = Kstat["kstat.zfs.misc.arcstats.size"] - mru_size = Kstat["kstat.zfs.misc.arcstats.mru_size"] - mfu_size = Kstat["kstat.zfs.misc.arcstats.mfu_size"] - meta_limit = Kstat["kstat.zfs.misc.arcstats.arc_meta_limit"] - meta_size = Kstat["kstat.zfs.misc.arcstats.arc_meta_used"] - dnode_limit = Kstat["kstat.zfs.misc.arcstats.arc_dnode_limit"] - dnode_size = Kstat["kstat.zfs.misc.arcstats.dnode_size"] - target_max_size = Kstat["kstat.zfs.misc.arcstats.c_max"] - target_min_size = Kstat["kstat.zfs.misc.arcstats.c_min"] - target_size = Kstat["kstat.zfs.misc.arcstats.c"] - - target_size_ratio = (target_max_size / target_min_size) - - # ARC Sizing - output['arc_sizing'] = {} - output['arc_sizing']['arc_size'] = { - 'per': fPerc(arc_size, target_max_size), - 'num': fBytes(arc_size), - } - output['arc_sizing']['target_max_size'] = { - 'ratio': target_size_ratio, - 'num': fBytes(target_max_size), - } - output['arc_sizing']['target_min_size'] = { - 'per': fPerc(target_min_size, target_max_size), - 'num': fBytes(target_min_size), - } - output['arc_sizing']['target_size'] = { - 'per': fPerc(target_size, target_max_size), - 'num': fBytes(target_size), - } - output['arc_sizing']['meta_limit'] = { - 'per': fPerc(meta_limit, target_max_size), - 'num': fBytes(meta_limit), - } - output['arc_sizing']['meta_size'] = { - 'per': fPerc(meta_size, meta_limit), - 'num': fBytes(meta_size), - } - output['arc_sizing']['dnode_limit'] = { - 'per': fPerc(dnode_limit, meta_limit), - 'num': fBytes(dnode_limit), - } - output['arc_sizing']['dnode_size'] = { - 'per': fPerc(dnode_size, dnode_limit), - 'num': fBytes(dnode_size), - } - - # ARC Hash Breakdown - output['arc_hash_break'] = {} - output['arc_hash_break']['hash_chain_max'] = Kstat[ - "kstat.zfs.misc.arcstats.hash_chain_max" - ] - output['arc_hash_break']['hash_chains'] = Kstat[ - "kstat.zfs.misc.arcstats.hash_chains" - ] - output['arc_hash_break']['hash_collisions'] = Kstat[ - "kstat.zfs.misc.arcstats.hash_collisions" - ] - output['arc_hash_break']['hash_elements'] = Kstat[ - "kstat.zfs.misc.arcstats.hash_elements" - ] - output['arc_hash_break']['hash_elements_max'] = Kstat[ - "kstat.zfs.misc.arcstats.hash_elements_max" - ] - - output['arc_size_break'] = {} - output['arc_size_break']['recently_used_cache_size'] = { - 'per': fPerc(mru_size, mru_size + mfu_size), - 'num': fBytes(mru_size), - } - output['arc_size_break']['frequently_used_cache_size'] = { - 'per': fPerc(mfu_size, mru_size + mfu_size), - 'num': fBytes(mfu_size), - } - - # ARC Hash Breakdown - hash_chain_max = Kstat["kstat.zfs.misc.arcstats.hash_chain_max"] - hash_chains = Kstat["kstat.zfs.misc.arcstats.hash_chains"] - hash_collisions = Kstat["kstat.zfs.misc.arcstats.hash_collisions"] - hash_elements = Kstat["kstat.zfs.misc.arcstats.hash_elements"] - hash_elements_max = Kstat["kstat.zfs.misc.arcstats.hash_elements_max"] - - output['arc_hash_break'] = {} - output['arc_hash_break']['elements_max'] = fHits(hash_elements_max) - output['arc_hash_break']['elements_current'] = { - 'per': fPerc(hash_elements, hash_elements_max), - 'num': fHits(hash_elements), - } - output['arc_hash_break']['collisions'] = fHits(hash_collisions) - output['arc_hash_break']['chain_max'] = fHits(hash_chain_max) - output['arc_hash_break']['chains'] = fHits(hash_chains) - - return output - - -def _arc_summary(Kstat): - """Print information on the ARC""" - - # ARC Sizing - arc = get_arc_summary(Kstat) - - sys.stdout.write("ARC Summary: (%s)\n" % arc['health']) - - sys.stdout.write("\tMemory Throttle Count:\t\t\t%s\n" % - arc['memory_throttle_count']) - sys.stdout.write("\n") - - # ARC Misc. - sys.stdout.write("ARC Misc:\n") - sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted']) - sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" % - arc['arc_misc']['mutex_miss']) - sys.stdout.write("\tEviction Skips:\t\t\t\t%s\n" % - arc['arc_misc']['evict_skips']) - sys.stdout.write("\tEviction Skips Due to L2 Writes:\t%s\n" % - arc['arc_misc']['evict_l2_skip']) - sys.stdout.write("\tL2 Cached Evictions:\t\t\t%s\n" % - arc['arc_misc']['evict_l2_cached']) - sys.stdout.write("\tL2 Eligible Evictions:\t\t\t%s\n" % - arc['arc_misc']['evict_l2_eligible']) - sys.stdout.write("\tL2 Eligible MFU Evictions:\t%s\t%s\n" % ( - arc['arc_misc']['evict_l2_eligible_mfu']['per'], - arc['arc_misc']['evict_l2_eligible_mfu']['num'], - ) - ) - sys.stdout.write("\tL2 Eligible MRU Evictions:\t%s\t%s\n" % ( - arc['arc_misc']['evict_l2_eligible_mru']['per'], - arc['arc_misc']['evict_l2_eligible_mru']['num'], - ) - ) - sys.stdout.write("\tL2 Ineligible Evictions:\t\t%s\n" % - arc['arc_misc']['evict_l2_ineligible']) - sys.stdout.write("\n") - - # ARC Sizing - sys.stdout.write("ARC Size:\t\t\t\t%s\t%s\n" % ( - arc['arc_sizing']['arc_size']['per'], - arc['arc_sizing']['arc_size']['num'] - ) - ) - sys.stdout.write("\tTarget Size: (Adaptive)\t\t%s\t%s\n" % ( - arc['arc_sizing']['target_size']['per'], - arc['arc_sizing']['target_size']['num'], - ) - ) - - sys.stdout.write("\tMin Size (Hard Limit):\t\t%s\t%s\n" % ( - arc['arc_sizing']['target_min_size']['per'], - arc['arc_sizing']['target_min_size']['num'], - ) - ) - - sys.stdout.write("\tMax Size (High Water):\t\t%d:1\t%s\n" % ( - arc['arc_sizing']['target_max_size']['ratio'], - arc['arc_sizing']['target_max_size']['num'], - ) - ) - - sys.stdout.write("\nARC Size Breakdown:\n") - sys.stdout.write("\tRecently Used Cache Size:\t%s\t%s\n" % ( - arc['arc_size_break']['recently_used_cache_size']['per'], - arc['arc_size_break']['recently_used_cache_size']['num'], - ) - ) - sys.stdout.write("\tFrequently Used Cache Size:\t%s\t%s\n" % ( - arc['arc_size_break']['frequently_used_cache_size']['per'], - arc['arc_size_break']['frequently_used_cache_size']['num'], - ) - ) - sys.stdout.write("\tMetadata Size (Hard Limit):\t%s\t%s\n" % ( - arc['arc_sizing']['meta_limit']['per'], - arc['arc_sizing']['meta_limit']['num'], - ) - ) - sys.stdout.write("\tMetadata Size:\t\t\t%s\t%s\n" % ( - arc['arc_sizing']['meta_size']['per'], - arc['arc_sizing']['meta_size']['num'], - ) - ) - sys.stdout.write("\tDnode Size (Hard Limit):\t%s\t%s\n" % ( - arc['arc_sizing']['dnode_limit']['per'], - arc['arc_sizing']['dnode_limit']['num'], - ) - ) - sys.stdout.write("\tDnode Size:\t\t\t%s\t%s\n" % ( - arc['arc_sizing']['dnode_size']['per'], - arc['arc_sizing']['dnode_size']['num'], - ) - ) - - sys.stdout.write("\n") - - # ARC Hash Breakdown - sys.stdout.write("ARC Hash Breakdown:\n") - sys.stdout.write("\tElements Max:\t\t\t\t%s\n" % - arc['arc_hash_break']['elements_max']) - sys.stdout.write("\tElements Current:\t\t%s\t%s\n" % ( - arc['arc_hash_break']['elements_current']['per'], - arc['arc_hash_break']['elements_current']['num'], - ) - ) - sys.stdout.write("\tCollisions:\t\t\t\t%s\n" % - arc['arc_hash_break']['collisions']) - sys.stdout.write("\tChain Max:\t\t\t\t%s\n" % - arc['arc_hash_break']['chain_max']) - sys.stdout.write("\tChains:\t\t\t\t\t%s\n" % - arc['arc_hash_break']['chains']) - - -def get_arc_efficiency(Kstat): - """Collect information on the efficiency of the ARC""" - - output = {} - - arc_hits = Kstat["kstat.zfs.misc.arcstats.hits"] - arc_misses = Kstat["kstat.zfs.misc.arcstats.misses"] - demand_data_hits = Kstat["kstat.zfs.misc.arcstats.demand_data_hits"] - demand_data_misses = Kstat["kstat.zfs.misc.arcstats.demand_data_misses"] - demand_metadata_hits = Kstat[ - "kstat.zfs.misc.arcstats.demand_metadata_hits" - ] - demand_metadata_misses = Kstat[ - "kstat.zfs.misc.arcstats.demand_metadata_misses" - ] - mfu_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mfu_ghost_hits"] - mfu_hits = Kstat["kstat.zfs.misc.arcstats.mfu_hits"] - mru_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mru_ghost_hits"] - mru_hits = Kstat["kstat.zfs.misc.arcstats.mru_hits"] - prefetch_data_hits = Kstat["kstat.zfs.misc.arcstats.prefetch_data_hits"] - prefetch_data_misses = Kstat[ - "kstat.zfs.misc.arcstats.prefetch_data_misses" - ] - prefetch_metadata_hits = Kstat[ - "kstat.zfs.misc.arcstats.prefetch_metadata_hits" - ] - prefetch_metadata_misses = Kstat[ - "kstat.zfs.misc.arcstats.prefetch_metadata_misses" - ] - - anon_hits = arc_hits - ( - mfu_hits + mru_hits + mfu_ghost_hits + mru_ghost_hits - ) - arc_accesses_total = (arc_hits + arc_misses) - demand_data_total = (demand_data_hits + demand_data_misses) - prefetch_data_total = (prefetch_data_hits + prefetch_data_misses) - real_hits = (mfu_hits + mru_hits) - - output["total_accesses"] = fHits(arc_accesses_total) - output["cache_hit_ratio"] = { - 'per': fPerc(arc_hits, arc_accesses_total), - 'num': fHits(arc_hits), - } - output["cache_miss_ratio"] = { - 'per': fPerc(arc_misses, arc_accesses_total), - 'num': fHits(arc_misses), - } - output["actual_hit_ratio"] = { - 'per': fPerc(real_hits, arc_accesses_total), - 'num': fHits(real_hits), - } - output["data_demand_efficiency"] = { - 'per': fPerc(demand_data_hits, demand_data_total), - 'num': fHits(demand_data_total), - } - - if prefetch_data_total > 0: - output["data_prefetch_efficiency"] = { - 'per': fPerc(prefetch_data_hits, prefetch_data_total), - 'num': fHits(prefetch_data_total), - } - - if anon_hits > 0: - output["cache_hits_by_cache_list"] = {} - output["cache_hits_by_cache_list"]["anonymously_used"] = { - 'per': fPerc(anon_hits, arc_hits), - 'num': fHits(anon_hits), - } - - output["most_recently_used"] = { - 'per': fPerc(mru_hits, arc_hits), - 'num': fHits(mru_hits), - } - output["most_frequently_used"] = { - 'per': fPerc(mfu_hits, arc_hits), - 'num': fHits(mfu_hits), - } - output["most_recently_used_ghost"] = { - 'per': fPerc(mru_ghost_hits, arc_hits), - 'num': fHits(mru_ghost_hits), - } - output["most_frequently_used_ghost"] = { - 'per': fPerc(mfu_ghost_hits, arc_hits), - 'num': fHits(mfu_ghost_hits), - } - - output["cache_hits_by_data_type"] = {} - output["cache_hits_by_data_type"]["demand_data"] = { - 'per': fPerc(demand_data_hits, arc_hits), - 'num': fHits(demand_data_hits), - } - output["cache_hits_by_data_type"]["prefetch_data"] = { - 'per': fPerc(prefetch_data_hits, arc_hits), - 'num': fHits(prefetch_data_hits), - } - output["cache_hits_by_data_type"]["demand_metadata"] = { - 'per': fPerc(demand_metadata_hits, arc_hits), - 'num': fHits(demand_metadata_hits), - } - output["cache_hits_by_data_type"]["prefetch_metadata"] = { - 'per': fPerc(prefetch_metadata_hits, arc_hits), - 'num': fHits(prefetch_metadata_hits), - } - - output["cache_misses_by_data_type"] = {} - output["cache_misses_by_data_type"]["demand_data"] = { - 'per': fPerc(demand_data_misses, arc_misses), - 'num': fHits(demand_data_misses), - } - output["cache_misses_by_data_type"]["prefetch_data"] = { - 'per': fPerc(prefetch_data_misses, arc_misses), - 'num': fHits(prefetch_data_misses), - } - output["cache_misses_by_data_type"]["demand_metadata"] = { - 'per': fPerc(demand_metadata_misses, arc_misses), - 'num': fHits(demand_metadata_misses), - } - output["cache_misses_by_data_type"]["prefetch_metadata"] = { - 'per': fPerc(prefetch_metadata_misses, arc_misses), - 'num': fHits(prefetch_metadata_misses), - } - - return output - - -def _arc_efficiency(Kstat): - """Print information on the efficiency of the ARC""" - - arc = get_arc_efficiency(Kstat) - - sys.stdout.write("ARC Total accesses:\t\t\t\t\t%s\n" % - arc['total_accesses']) - sys.stdout.write("\tCache Hit Ratio:\t\t%s\t%s\n" % ( - arc['cache_hit_ratio']['per'], - arc['cache_hit_ratio']['num'], - ) - ) - sys.stdout.write("\tCache Miss Ratio:\t\t%s\t%s\n" % ( - arc['cache_miss_ratio']['per'], - arc['cache_miss_ratio']['num'], - ) - ) - - sys.stdout.write("\tActual Hit Ratio:\t\t%s\t%s\n" % ( - arc['actual_hit_ratio']['per'], - arc['actual_hit_ratio']['num'], - ) - ) - - sys.stdout.write("\n") - sys.stdout.write("\tData Demand Efficiency:\t\t%s\t%s\n" % ( - arc['data_demand_efficiency']['per'], - arc['data_demand_efficiency']['num'], - ) - ) - - if 'data_prefetch_efficiency' in arc: - sys.stdout.write("\tData Prefetch Efficiency:\t%s\t%s\n" % ( - arc['data_prefetch_efficiency']['per'], - arc['data_prefetch_efficiency']['num'], - ) - ) - sys.stdout.write("\n") - - sys.stdout.write("\tCACHE HITS BY CACHE LIST:\n") - if 'cache_hits_by_cache_list' in arc: - sys.stdout.write("\t Anonymously Used:\t\t%s\t%s\n" % ( - arc['cache_hits_by_cache_list']['anonymously_used']['per'], - arc['cache_hits_by_cache_list']['anonymously_used']['num'], - ) - ) - sys.stdout.write("\t Most Recently Used:\t\t%s\t%s\n" % ( - arc['most_recently_used']['per'], - arc['most_recently_used']['num'], - ) - ) - sys.stdout.write("\t Most Frequently Used:\t\t%s\t%s\n" % ( - arc['most_frequently_used']['per'], - arc['most_frequently_used']['num'], - ) - ) - sys.stdout.write("\t Most Recently Used Ghost:\t%s\t%s\n" % ( - arc['most_recently_used_ghost']['per'], - arc['most_recently_used_ghost']['num'], - ) - ) - sys.stdout.write("\t Most Frequently Used Ghost:\t%s\t%s\n" % ( - arc['most_frequently_used_ghost']['per'], - arc['most_frequently_used_ghost']['num'], - ) - ) - - sys.stdout.write("\n\tCACHE HITS BY DATA TYPE:\n") - sys.stdout.write("\t Demand Data:\t\t\t%s\t%s\n" % ( - arc["cache_hits_by_data_type"]['demand_data']['per'], - arc["cache_hits_by_data_type"]['demand_data']['num'], - ) - ) - sys.stdout.write("\t Prefetch Data:\t\t%s\t%s\n" % ( - arc["cache_hits_by_data_type"]['prefetch_data']['per'], - arc["cache_hits_by_data_type"]['prefetch_data']['num'], - ) - ) - sys.stdout.write("\t Demand Metadata:\t\t%s\t%s\n" % ( - arc["cache_hits_by_data_type"]['demand_metadata']['per'], - arc["cache_hits_by_data_type"]['demand_metadata']['num'], - ) - ) - sys.stdout.write("\t Prefetch Metadata:\t\t%s\t%s\n" % ( - arc["cache_hits_by_data_type"]['prefetch_metadata']['per'], - arc["cache_hits_by_data_type"]['prefetch_metadata']['num'], - ) - ) - - sys.stdout.write("\n\tCACHE MISSES BY DATA TYPE:\n") - sys.stdout.write("\t Demand Data:\t\t\t%s\t%s\n" % ( - arc["cache_misses_by_data_type"]['demand_data']['per'], - arc["cache_misses_by_data_type"]['demand_data']['num'], - ) - ) - sys.stdout.write("\t Prefetch Data:\t\t%s\t%s\n" % ( - arc["cache_misses_by_data_type"]['prefetch_data']['per'], - arc["cache_misses_by_data_type"]['prefetch_data']['num'], - ) - ) - sys.stdout.write("\t Demand Metadata:\t\t%s\t%s\n" % ( - arc["cache_misses_by_data_type"]['demand_metadata']['per'], - arc["cache_misses_by_data_type"]['demand_metadata']['num'], - ) - ) - sys.stdout.write("\t Prefetch Metadata:\t\t%s\t%s\n" % ( - arc["cache_misses_by_data_type"]['prefetch_metadata']['per'], - arc["cache_misses_by_data_type"]['prefetch_metadata']['num'], - ) - ) - - -def get_l2arc_summary(Kstat): - """Collection information on the L2ARC""" - - output = {} - - l2_abort_lowmem = Kstat["kstat.zfs.misc.arcstats.l2_abort_lowmem"] - l2_cksum_bad = Kstat["kstat.zfs.misc.arcstats.l2_cksum_bad"] - l2_evict_lock_retry = Kstat["kstat.zfs.misc.arcstats.l2_evict_lock_retry"] - l2_evict_reading = Kstat["kstat.zfs.misc.arcstats.l2_evict_reading"] - l2_feeds = Kstat["kstat.zfs.misc.arcstats.l2_feeds"] - l2_free_on_write = Kstat["kstat.zfs.misc.arcstats.l2_free_on_write"] - l2_hdr_size = Kstat["kstat.zfs.misc.arcstats.l2_hdr_size"] - l2_hits = Kstat["kstat.zfs.misc.arcstats.l2_hits"] - l2_io_error = Kstat["kstat.zfs.misc.arcstats.l2_io_error"] - l2_misses = Kstat["kstat.zfs.misc.arcstats.l2_misses"] - l2_rw_clash = Kstat["kstat.zfs.misc.arcstats.l2_rw_clash"] - l2_size = Kstat["kstat.zfs.misc.arcstats.l2_size"] - l2_asize = Kstat["kstat.zfs.misc.arcstats.l2_asize"] - l2_writes_done = Kstat["kstat.zfs.misc.arcstats.l2_writes_done"] - l2_writes_error = Kstat["kstat.zfs.misc.arcstats.l2_writes_error"] - l2_writes_sent = Kstat["kstat.zfs.misc.arcstats.l2_writes_sent"] - l2_mfu_asize = Kstat["kstat.zfs.misc.arcstats.l2_mfu_asize"] - l2_mru_asize = Kstat["kstat.zfs.misc.arcstats.l2_mru_asize"] - l2_prefetch_asize = Kstat["kstat.zfs.misc.arcstats.l2_prefetch_asize"] - l2_bufc_data_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_data_asize"] - l2_bufc_metadata_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_metadata_asize"] - - l2_access_total = (l2_hits + l2_misses) - output['l2_health_count'] = (l2_writes_error + l2_cksum_bad + l2_io_error) - - output['l2_access_total'] = l2_access_total - output['l2_size'] = l2_size - output['l2_asize'] = l2_asize - - if l2_size > 0 and l2_access_total > 0: - - if output['l2_health_count'] > 0: - output["health"] = "DEGRADED" - else: - output["health"] = "HEALTHY" - - output["low_memory_aborts"] = fHits(l2_abort_lowmem) - output["free_on_write"] = fHits(l2_free_on_write) - output["rw_clashes"] = fHits(l2_rw_clash) - output["bad_checksums"] = fHits(l2_cksum_bad) - output["io_errors"] = fHits(l2_io_error) - - output["l2_arc_size"] = {} - output["l2_arc_size"]["adaptive"] = fBytes(l2_size) - output["l2_arc_size"]["actual"] = { - 'per': fPerc(l2_asize, l2_size), - 'num': fBytes(l2_asize) - } - output["l2_arc_size"]["head_size"] = { - 'per': fPerc(l2_hdr_size, l2_size), - 'num': fBytes(l2_hdr_size), - } - output["l2_arc_size"]["mfu_asize"] = { - 'per': fPerc(l2_mfu_asize, l2_asize), - 'num': fBytes(l2_mfu_asize), - } - output["l2_arc_size"]["mru_asize"] = { - 'per': fPerc(l2_mru_asize, l2_asize), - 'num': fBytes(l2_mru_asize), - } - output["l2_arc_size"]["prefetch_asize"] = { - 'per': fPerc(l2_prefetch_asize, l2_asize), - 'num': fBytes(l2_prefetch_asize), - } - output["l2_arc_size"]["bufc_data_asize"] = { - 'per': fPerc(l2_bufc_data_asize, l2_asize), - 'num': fBytes(l2_bufc_data_asize), - } - output["l2_arc_size"]["bufc_metadata_asize"] = { - 'per': fPerc(l2_bufc_metadata_asize, l2_asize), - 'num': fBytes(l2_bufc_metadata_asize), - } - - output["l2_arc_evicts"] = {} - output["l2_arc_evicts"]['lock_retries'] = fHits(l2_evict_lock_retry) - output["l2_arc_evicts"]['reading'] = fHits(l2_evict_reading) - - output['l2_arc_breakdown'] = {} - output['l2_arc_breakdown']['value'] = fHits(l2_access_total) - output['l2_arc_breakdown']['hit_ratio'] = { - 'per': fPerc(l2_hits, l2_access_total), - 'num': fHits(l2_hits), - } - output['l2_arc_breakdown']['miss_ratio'] = { - 'per': fPerc(l2_misses, l2_access_total), - 'num': fHits(l2_misses), - } - output['l2_arc_breakdown']['feeds'] = fHits(l2_feeds) - - output['l2_arc_buffer'] = {} - - output['l2_arc_writes'] = {} - output['l2_writes_done'] = l2_writes_done - output['l2_writes_sent'] = l2_writes_sent - if l2_writes_done != l2_writes_sent: - output['l2_arc_writes']['writes_sent'] = { - 'value': "FAULTED", - 'num': fHits(l2_writes_sent), - } - output['l2_arc_writes']['done_ratio'] = { - 'per': fPerc(l2_writes_done, l2_writes_sent), - 'num': fHits(l2_writes_done), - } - output['l2_arc_writes']['error_ratio'] = { - 'per': fPerc(l2_writes_error, l2_writes_sent), - 'num': fHits(l2_writes_error), - } - else: - output['l2_arc_writes']['writes_sent'] = { - 'per': fPerc(100), - 'num': fHits(l2_writes_sent), - } - - return output - - -def _l2arc_summary(Kstat): - """Print information on the L2ARC""" - - arc = get_l2arc_summary(Kstat) - - if arc['l2_size'] > 0 and arc['l2_access_total'] > 0: - sys.stdout.write("L2 ARC Summary: ") - if arc['l2_health_count'] > 0: - sys.stdout.write("(DEGRADED)\n") - else: - sys.stdout.write("(HEALTHY)\n") - sys.stdout.write("\tLow Memory Aborts:\t\t\t%s\n" % - arc['low_memory_aborts']) - sys.stdout.write("\tFree on Write:\t\t\t\t%s\n" % arc['free_on_write']) - sys.stdout.write("\tR/W Clashes:\t\t\t\t%s\n" % arc['rw_clashes']) - sys.stdout.write("\tBad Checksums:\t\t\t\t%s\n" % arc['bad_checksums']) - sys.stdout.write("\tIO Errors:\t\t\t\t%s\n" % arc['io_errors']) - sys.stdout.write("\n") - - sys.stdout.write("L2 ARC Size: (Adaptive)\t\t\t\t%s\n" % - arc["l2_arc_size"]["adaptive"]) - sys.stdout.write("\tCompressed:\t\t\t%s\t%s\n" % ( - arc["l2_arc_size"]["actual"]["per"], - arc["l2_arc_size"]["actual"]["num"], - ) - ) - sys.stdout.write("\tHeader Size:\t\t\t%s\t%s\n" % ( - arc["l2_arc_size"]["head_size"]["per"], - arc["l2_arc_size"]["head_size"]["num"], - ) - ) - sys.stdout.write("\tMFU Alloc. Size:\t\t%s\t%s\n" % ( - arc["l2_arc_size"]["mfu_asize"]["per"], - arc["l2_arc_size"]["mfu_asize"]["num"], - ) - ) - sys.stdout.write("\tMRU Alloc. Size:\t\t%s\t%s\n" % ( - arc["l2_arc_size"]["mru_asize"]["per"], - arc["l2_arc_size"]["mru_asize"]["num"], - ) - ) - sys.stdout.write("\tPrefetch Alloc. Size:\t\t%s\t%s\n" % ( - arc["l2_arc_size"]["prefetch_asize"]["per"], - arc["l2_arc_size"]["prefetch_asize"]["num"], - ) - ) - sys.stdout.write("\tData (buf content) Alloc. Size:\t%s\t%s\n" % ( - arc["l2_arc_size"]["bufc_data_asize"]["per"], - arc["l2_arc_size"]["bufc_data_asize"]["num"], - ) - ) - sys.stdout.write("\tMetadata (buf content) Size:\t%s\t%s\n" % ( - arc["l2_arc_size"]["bufc_metadata_asize"]["per"], - arc["l2_arc_size"]["bufc_metadata_asize"]["num"], - ) - ) - sys.stdout.write("\n") - - if arc["l2_arc_evicts"]['lock_retries'] != '0' or \ - arc["l2_arc_evicts"]["reading"] != '0': - sys.stdout.write("L2 ARC Evictions:\n") - sys.stdout.write("\tLock Retries:\t\t\t\t%s\n" % - arc["l2_arc_evicts"]['lock_retries']) - sys.stdout.write("\tUpon Reading:\t\t\t\t%s\n" % - arc["l2_arc_evicts"]["reading"]) - sys.stdout.write("\n") - - sys.stdout.write("L2 ARC Breakdown:\t\t\t\t%s\n" % - arc['l2_arc_breakdown']['value']) - sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % ( - arc['l2_arc_breakdown']['hit_ratio']['per'], - arc['l2_arc_breakdown']['hit_ratio']['num'], - ) - ) - - sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % ( - arc['l2_arc_breakdown']['miss_ratio']['per'], - arc['l2_arc_breakdown']['miss_ratio']['num'], - ) - ) - - sys.stdout.write("\tFeeds:\t\t\t\t\t%s\n" % - arc['l2_arc_breakdown']['feeds']) - sys.stdout.write("\n") - - sys.stdout.write("L2 ARC Writes:\n") - if arc['l2_writes_done'] != arc['l2_writes_sent']: - sys.stdout.write("\tWrites Sent: (%s)\t\t\t\t%s\n" % ( - arc['l2_arc_writes']['writes_sent']['value'], - arc['l2_arc_writes']['writes_sent']['num'], - ) - ) - sys.stdout.write("\t Done Ratio:\t\t\t%s\t%s\n" % ( - arc['l2_arc_writes']['done_ratio']['per'], - arc['l2_arc_writes']['done_ratio']['num'], - ) - ) - sys.stdout.write("\t Error Ratio:\t\t\t%s\t%s\n" % ( - arc['l2_arc_writes']['error_ratio']['per'], - arc['l2_arc_writes']['error_ratio']['num'], - ) - ) - else: - sys.stdout.write("\tWrites Sent:\t\t\t%s\t%s\n" % ( - arc['l2_arc_writes']['writes_sent']['per'], - arc['l2_arc_writes']['writes_sent']['num'], - ) - ) - - -def get_dmu_summary(Kstat): - """Collect information on the DMU""" - - output = {} - - zfetch_hits = Kstat["kstat.zfs.misc.zfetchstats.hits"] - zfetch_misses = Kstat["kstat.zfs.misc.zfetchstats.misses"] - - zfetch_access_total = (zfetch_hits + zfetch_misses) - output['zfetch_access_total'] = zfetch_access_total - - if zfetch_access_total > 0: - output['dmu'] = {} - output['dmu']['efficiency'] = {} - output['dmu']['efficiency']['value'] = fHits(zfetch_access_total) - output['dmu']['efficiency']['hit_ratio'] = { - 'per': fPerc(zfetch_hits, zfetch_access_total), - 'num': fHits(zfetch_hits), - } - output['dmu']['efficiency']['miss_ratio'] = { - 'per': fPerc(zfetch_misses, zfetch_access_total), - 'num': fHits(zfetch_misses), - } - - return output - - -def _dmu_summary(Kstat): - """Print information on the DMU""" - - arc = get_dmu_summary(Kstat) - - if arc['zfetch_access_total'] > 0: - sys.stdout.write("DMU Prefetch Efficiency:\t\t\t\t\t%s\n" % - arc['dmu']['efficiency']['value']) - sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % ( - arc['dmu']['efficiency']['hit_ratio']['per'], - arc['dmu']['efficiency']['hit_ratio']['num'], - ) - ) - sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % ( - arc['dmu']['efficiency']['miss_ratio']['per'], - arc['dmu']['efficiency']['miss_ratio']['num'], - ) - ) - - sys.stdout.write("\n") - - -def get_vdev_summary(Kstat): - """Collect information on the VDEVs""" - - output = {} - - vdev_cache_delegations = \ - Kstat["kstat.zfs.misc.vdev_cache_stats.delegations"] - vdev_cache_misses = Kstat["kstat.zfs.misc.vdev_cache_stats.misses"] - vdev_cache_hits = Kstat["kstat.zfs.misc.vdev_cache_stats.hits"] - vdev_cache_total = (vdev_cache_misses + vdev_cache_hits + - vdev_cache_delegations) - - output['vdev_cache_total'] = vdev_cache_total - - if vdev_cache_total > 0: - output['summary'] = fHits(vdev_cache_total) - output['hit_ratio'] = { - 'per': fPerc(vdev_cache_hits, vdev_cache_total), - 'num': fHits(vdev_cache_hits), - } - output['miss_ratio'] = { - 'per': fPerc(vdev_cache_misses, vdev_cache_total), - 'num': fHits(vdev_cache_misses), - } - output['delegations'] = { - 'per': fPerc(vdev_cache_delegations, vdev_cache_total), - 'num': fHits(vdev_cache_delegations), - } - - return output - - -def _vdev_summary(Kstat): - """Print information on the VDEVs""" - - arc = get_vdev_summary(Kstat) - - if arc['vdev_cache_total'] > 0: - sys.stdout.write("VDEV Cache Summary:\t\t\t\t%s\n" % arc['summary']) - sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % ( - arc['hit_ratio']['per'], - arc['hit_ratio']['num'], - )) - sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % ( - arc['miss_ratio']['per'], - arc['miss_ratio']['num'], - )) - sys.stdout.write("\tDelegations:\t\t\t%s\t%s\n" % ( - arc['delegations']['per'], - arc['delegations']['num'], - )) - - -def _tunable_summary(Kstat): - """Print information on tunables, including descriptions if requested""" - - global show_tunable_descriptions - global alternate_tunable_layout - - tunables = load_tunables() - descriptions = {} - - if show_tunable_descriptions: - - command = ["/sbin/modinfo", "zfs", "-0"] - - try: - p = Popen(command, stdin=PIPE, stdout=PIPE, - stderr=PIPE, shell=False, close_fds=True) - p.wait() - - # By default, Python 2 returns a string as the first element of the - # tuple from p.communicate(), while Python 3 returns bytes which - # must be decoded first. The better way to do this would be with - # subprocess.run() or at least .check_output(), but this fails on - # CentOS 6 because of its old version of Python 2 - desc = bytes.decode(p.communicate()[0]) - description_list = desc.strip().split('\0') - - if p.returncode == 0: - for tunable in description_list: - if tunable[0:5] == 'parm:': - tunable = tunable[5:].strip() - name, description = tunable.split(':', 1) - if not description: - description = "Description unavailable" - descriptions[name] = description - else: - sys.stderr.write("%s: '%s' exited with code %i\n" % - (sys.argv[0], command[0], p.returncode)) - sys.stderr.write("Tunable descriptions will be disabled.\n") - except OSError as e: - sys.stderr.write("%s: Cannot run '%s': %s\n" % - (sys.argv[0], command[0], e.strerror)) - sys.stderr.write("Tunable descriptions will be disabled.\n") - - sys.stdout.write("ZFS Tunables:\n") - - if alternate_tunable_layout: - fmt = "\t%s=%s\n" - else: - fmt = "\t%-50s%s\n" - - for name in sorted(tunables.keys()): - if show_tunable_descriptions and name in descriptions: - sys.stdout.write("\t# %s\n" % descriptions[name]) - - sys.stdout.write(fmt % (name, tunables[name])) - - -unSub = [ - _arc_summary, - _arc_efficiency, - _l2arc_summary, - _dmu_summary, - _vdev_summary, - _tunable_summary -] - - -def zfs_header(): - """Print title string with date""" - - daydate = time.strftime('%a %b %d %H:%M:%S %Y') - - sys.stdout.write('\n'+'-'*72+'\n') - sys.stdout.write('ZFS Subsystem Report\t\t\t\t%s' % daydate) - sys.stdout.write('\n') - - -def usage(): - """Print usage information""" - - sys.stdout.write("Usage: arc_summary [-h] [-a] [-d] [-p PAGE]\n\n") - sys.stdout.write("\t -h, --help : " - "Print this help message and exit\n") - sys.stdout.write("\t -a, --alternate : " - "Show an alternate sysctl layout\n") - sys.stdout.write("\t -d, --description : " - "Show the sysctl descriptions\n") - sys.stdout.write("\t -p PAGE, --page=PAGE : " - "Select a single output page to display,\n") - sys.stdout.write("\t " - "should be an integer between 1 and " + - str(len(unSub)) + "\n\n") - sys.stdout.write("Examples:\n") - sys.stdout.write("\tarc_summary -a\n") - sys.stdout.write("\tarc_summary -p 4\n") - sys.stdout.write("\tarc_summary -ad\n") - sys.stdout.write("\tarc_summary --page=2\n") - - -def main(): - """Main function""" - - global show_tunable_descriptions - global alternate_tunable_layout - - try: - try: - opts, args = getopt.getopt( - sys.argv[1:], - "adp:h", ["alternate", "description", "page=", "help"] - ) - except getopt.error as e: - sys.stderr.write("Error: %s\n" % e.msg) - usage() - sys.exit(1) - - args = {} - for opt, arg in opts: - if opt in ('-a', '--alternate'): - args['a'] = True - if opt in ('-d', '--description'): - args['d'] = True - if opt in ('-p', '--page'): - args['p'] = arg - if opt in ('-h', '--help'): - usage() - sys.exit(0) - - Kstat = get_Kstat() - - alternate_tunable_layout = 'a' in args - show_tunable_descriptions = 'd' in args - - pages = [] - - if 'p' in args: - try: - pages.append(unSub[int(args['p']) - 1]) - except IndexError: - sys.stderr.write('the argument to -p must be between 1 and ' + - str(len(unSub)) + '\n') - sys.exit(1) - else: - pages = unSub - - zfs_header() - for page in pages: - page(Kstat) - sys.stdout.write("\n") - except IOError as ex: - if (ex.errno == errno.EPIPE): - sys.exit(0) - raise - except KeyboardInterrupt: - sys.exit(0) - - -if __name__ == '__main__': - main() diff --git a/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 b/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 index 301c485b34ba..9d0c2d30ddd6 100755 --- a/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 +++ b/sys/contrib/openzfs/cmd/arc_summary/arc_summary3 @@ -191,21 +191,13 @@ elif sys.platform.startswith('linux'): # there, so we fall back on modinfo command = ["/sbin/modinfo", request, "-0"] - # The recommended way to do this is with subprocess.run(). However, - # some installed versions of Python are < 3.5, so we offer them - # the option of doing it the old way (for now) info = '' try: - if 'run' in dir(subprocess): - info = subprocess.run(command, stdout=subprocess.PIPE, - universal_newlines=True) - raw_output = info.stdout.split('\0') - else: - info = subprocess.check_output(command, - universal_newlines=True) - raw_output = info.split('\0') + info = subprocess.run(command, stdout=subprocess.PIPE, + check=True, universal_newlines=True) + raw_output = info.stdout.split('\0') except subprocess.CalledProcessError: print("Error: Descriptions not available", diff --git a/sys/contrib/openzfs/cmd/arcstat/arcstat.in b/sys/contrib/openzfs/cmd/arcstat/arcstat.in index 425e52d1f513..0128fd81759f 100755 --- a/sys/contrib/openzfs/cmd/arcstat/arcstat.in +++ b/sys/contrib/openzfs/cmd/arcstat/arcstat.in @@ -47,7 +47,7 @@ # @hdr is the array of fields that needs to be printed, so we # just iterate over this array and print the values using our pretty printer. # -# This script must remain compatible with Python 2.6+ and Python 3.4+. +# This script must remain compatible with Python 3.6+. # import sys diff --git a/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in b/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in index 82250353f5eb..b716a0c9749b 100755 --- a/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in +++ b/sys/contrib/openzfs/cmd/dbufstat/dbufstat.in @@ -27,7 +27,7 @@ # Copyright (C) 2013 Lawrence Livermore National Security, LLC. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # -# This script must remain compatible with Python 2.6+ and Python 3.4+. +# This script must remain compatible with and Python 3.6+. # import sys diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c index 35dd818ff80d..e148ae52dbf0 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c @@ -80,6 +80,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) char *path = NULL; uint_t c, children; nvlist_t **child; + uint64_t vdev_guid; /* * First iterate over any children. @@ -100,7 +101,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) &child, &children) == 0) { for (c = 0; c < children; c++) { if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { - gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; + gsp->gs_vdev_type = DEVICE_TYPE_SPARE; return (B_TRUE); } } @@ -109,7 +110,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) &child, &children) == 0) { for (c = 0; c < children; c++) { if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { - gsp->gs_vdev_type = DEVICE_TYPE_SPARE; + gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; return (B_TRUE); } } @@ -126,6 +127,21 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) &gsp->gs_vdev_expandtime); return (B_TRUE); } + /* + * Otherwise, on a vdev guid match, grab the devid and expansion + * time. The devid might be missing on removal since its not part + * of blkid cache and L2ARC VDEV does not contain pool guid in its + * blkid, so this is a special case for L2ARC VDEV. + */ + else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL && + nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 && + gsp->gs_vdev_guid == vdev_guid) { + (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, + &gsp->gs_devid); + (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, + &gsp->gs_vdev_expandtime); + return (B_TRUE); + } return (B_FALSE); } @@ -148,13 +164,13 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) /* * if a match was found then grab the pool guid */ - if (gsp->gs_vdev_guid) { + if (gsp->gs_vdev_guid && gsp->gs_devid) { (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &gsp->gs_pool_guid); } zpool_close(zhp); - return (gsp->gs_vdev_guid != 0); + return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0); } void @@ -195,11 +211,13 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) uint64_t pool_guid = 0, vdev_guid = 0; guid_search_t search = { 0 }; device_type_t devtype = DEVICE_TYPE_PRIMARY; + char *devid = NULL; class = "resource.fs.zfs.removed"; subclass = ""; (void) nvlist_add_string(payload, FM_CLASS, class); + (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); @@ -209,20 +227,24 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); /* + * If devid is missing but vdev_guid is available, find devid + * and pool_guid from vdev_guid. * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or * ZFS_EV_POOL_GUID may be missing so find them. */ - if (pool_guid == 0 || vdev_guid == 0) { - if ((nvlist_lookup_string(nvl, DEV_IDENTIFIER, - &search.gs_devid) == 0) && - (zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search) - == 1)) { - if (pool_guid == 0) - pool_guid = search.gs_pool_guid; - if (vdev_guid == 0) - vdev_guid = search.gs_vdev_guid; - devtype = search.gs_vdev_type; - } + if (devid == NULL || pool_guid == 0 || vdev_guid == 0) { + if (devid == NULL) + search.gs_vdev_guid = vdev_guid; + else + search.gs_devid = devid; + zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); + if (devid == NULL) + devid = search.gs_devid; + if (pool_guid == 0) + pool_guid = search.gs_pool_guid; + if (vdev_guid == 0) + vdev_guid = search.gs_vdev_guid; + devtype = search.gs_vdev_type; } /* @@ -235,7 +257,9 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) search.gs_vdev_expandtime + 10 > tv.tv_sec) { zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " "for recently expanded device '%s'", EC_DEV_REMOVE, - search.gs_devid); + devid); + fnvlist_free(payload); + free(event); goto out; } diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c index 1945c298e6bc..f67fd96af045 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c @@ -185,10 +185,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL, faulted = 0ULL; uint64_t guid = 0ULL; + uint64_t is_spare = 0; char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; + int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; boolean_t is_sd = B_FALSE; boolean_t is_mpath_wholedisk = B_FALSE; uint_t c; @@ -214,6 +216,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); + (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_IS_SPARE, &is_spare); /* * Special case: @@ -304,11 +307,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) } } + if (is_spare) + online_flag |= ZFS_ONLINE_SPARE; + /* * Attempt to online the device. */ - if (zpool_vdev_online(zhp, fullpath, - ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 && + if (zpool_vdev_online(zhp, fullpath, online_flag, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, @@ -527,6 +532,7 @@ typedef struct dev_data { uint64_t dd_vdev_guid; uint64_t dd_new_vdev_guid; const char *dd_new_devid; + uint64_t dd_num_spares; } dev_data_t; static void @@ -537,6 +543,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) uint_t c, children; nvlist_t **child; uint64_t guid = 0; + uint64_t isspare = 0; /* * First iterate over any children. @@ -562,7 +569,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) } /* once a vdev was matched and processed there is nothing left to do */ - if (dp->dd_found) + if (dp->dd_found && dp->dd_num_spares == 0) return; (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &guid); @@ -612,6 +619,10 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) } } + if (dp->dd_found == B_TRUE && nvlist_lookup_uint64(nvl, + ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) + dp->dd_num_spares++; + (dp->dd_func)(zhp, nvl, dp->dd_islabeled); } @@ -672,7 +683,9 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data) } zpool_close(zhp); - return (dp->dd_found); /* cease iteration after a match */ + + /* cease iteration after a match */ + return (dp->dd_found && dp->dd_num_spares == 0); } /* diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c index f4063bea7378..b4794e31193f 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c @@ -75,6 +75,8 @@ typedef struct find_cbdata { uint64_t cb_guid; zpool_handle_t *cb_zhp; nvlist_t *cb_vdev; + uint64_t cb_vdev_guid; + uint64_t cb_num_spares; } find_cbdata_t; static int @@ -140,6 +142,64 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) return (NULL); } +static int +remove_spares(zpool_handle_t *zhp, void *data) +{ + nvlist_t *config, *nvroot; + nvlist_t **spares; + uint_t nspares; + char *devname; + find_cbdata_t *cbp = data; + uint64_t spareguid = 0; + vdev_stat_t *vs; + unsigned int c; + + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { + zpool_close(zhp); + return (0); + } + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) != 0) { + zpool_close(zhp); + return (0); + } + + for (int i = 0; i < nspares; i++) { + if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, + &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) { + devname = zpool_vdev_name(NULL, zhp, spares[i], + B_FALSE); + nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c); + if (vs->vs_state != VDEV_STATE_REMOVED && + zpool_vdev_remove_wanted(zhp, devname) == 0) + cbp->cb_num_spares++; + break; + } + } + + zpool_close(zhp); + return (0); +} + +/* + * Given a vdev guid, find and remove all spares associated with it. + */ +static int +find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid) +{ + find_cbdata_t cb; + + cb.cb_num_spares = 0; + cb.cb_vdev_guid = vdev_guid; + zpool_iter(zhdl, remove_spares, &cb); + + return (cb.cb_num_spares); +} + /* * Given a (pool, vdev) GUID pair, find the matching pool and vdev. */ @@ -315,6 +375,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, libzfs_handle_t *zhdl = zdp->zrd_hdl; boolean_t fault_device, degrade_device; boolean_t is_repair; + boolean_t l2arc = B_FALSE; + boolean_t spare = B_FALSE; char *scheme; nvlist_t *vdev = NULL; char *uuid; @@ -323,6 +385,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, boolean_t is_disk; vdev_aux_t aux; uint64_t state = 0; + vdev_stat_t *vs; + unsigned int c; fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); @@ -339,10 +403,26 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, char *devtype; char *devname; + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &devtype) == 0) { + if (strcmp(devtype, VDEV_TYPE_SPARE) == 0) + spare = B_TRUE; + else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) + l2arc = B_TRUE; + } + + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) + return; + + if (spare) { + int nspares = find_and_remove_spares(zhdl, vdev_guid); + fmd_hdl_debug(hdl, "%d spares removed", nspares); + return; + } + if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, - &pool_guid) != 0 || - nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, - &vdev_guid) != 0) + &pool_guid) != 0) return; if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, @@ -351,13 +431,28 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); - /* Can't replace l2arc with a spare: offline the device */ - if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, - &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) { - fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); - zpool_vdev_offline(zhp, devname, B_TRUE); - } else if (!fmd_prop_get_int32(hdl, "spare_on_remove") || - replace_with_spare(hdl, zhp, vdev) == B_FALSE) { + nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c); + + /* + * If state removed is requested for already removed vdev, + * its a loopback event from spa_async_remove(). Just + * ignore it. + */ + if (vs->vs_state == VDEV_STATE_REMOVED && + state == VDEV_STATE_REMOVED) + return; + + /* Remove the vdev since device is unplugged */ + if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { + int status = zpool_vdev_remove_wanted(zhp, devname); + fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" + ", ret:%d", devname, status); + } + + /* Replace the vdev with a spare if its not a l2arc */ + if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") || + replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { /* Could not handle with spare */ fmd_hdl_debug(hdl, "no spare for '%s'", devname); } diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 02b19e7163c1..2d81ef31c4ac 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -315,14 +315,14 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] <snapshot>\n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRvLecwhb] [-[i|I] snapshot] " + return (gettext("\tsend [-DnPpRVvLecwhb] [-[i|I] snapshot] " "<snapshot>\n" - "\tsend [-DnvPLecw] [-i snapshot|bookmark] " + "\tsend [-DnVvPLecw] [-i snapshot|bookmark] " "<filesystem|volume|snapshot>\n" - "\tsend [-DnPpvLec] [-i bookmark|snapshot] " + "\tsend [-DnPpVvLec] [-i bookmark|snapshot] " "--redact <bookmark> <snapshot>\n" - "\tsend [-nvPe] -t <receive_resume_token>\n" - "\tsend [-Pnv] --saved filesystem\n")); + "\tsend [-nVvPe] -t <receive_resume_token>\n" + "\tsend [-PnVv] --saved filesystem\n")); case HELP_SET: return (gettext("\tset <property=value> ... " "<filesystem|volume|snapshot> ...\n")); @@ -3474,6 +3474,8 @@ print_header(list_cbdata_t *cb) boolean_t first = B_TRUE; boolean_t right_justify; + color_start(ANSI_BOLD); + for (; pl != NULL; pl = pl->pl_next) { if (!first) { (void) printf(" "); @@ -3500,10 +3502,32 @@ print_header(list_cbdata_t *cb) (void) printf("%-*s", (int)pl->pl_width, header); } + color_end(); + (void) printf("\n"); } /* + * Decides on the color that the avail value should be printed in. + * > 80% used = yellow + * > 90% used = red + */ +static const char * +zfs_list_avail_color(zfs_handle_t *zhp) +{ + uint64_t used = zfs_prop_get_int(zhp, ZFS_PROP_USED); + uint64_t avail = zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE); + int percentage = (int)((double)avail / MAX(avail + used, 1) * 100); + + if (percentage > 20) + return (NULL); + else if (percentage > 10) + return (ANSI_YELLOW); + else + return (ANSI_RED); +} + +/* * Given a dataset and a list of fields, print out all the properties according * to the described layout. */ @@ -3566,6 +3590,22 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) } /* + * zfs_list_avail_color() needs ZFS_PROP_AVAILABLE + USED + * - so we need another for() search for the USED part + * - when no colors wanted, we can skip the whole thing + */ + if (use_color() && pl->pl_prop == ZFS_PROP_AVAILABLE) { + zprop_list_t *pl2 = cb->cb_proplist; + for (; pl2 != NULL; pl2 = pl2->pl_next) { + if (pl2->pl_prop == ZFS_PROP_USED) { + color_start(zfs_list_avail_color(zhp)); + /* found it, no need for more loops */ + break; + } + } + } + + /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. @@ -3576,6 +3616,9 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) (void) printf("%*s", (int)pl->pl_width, propstr); else (void) printf("%-*s", (int)pl->pl_width, propstr); + + if (pl->pl_prop == ZFS_PROP_AVAILABLE) + color_end(); } (void) printf("\n"); @@ -4407,6 +4450,7 @@ zfs_do_send(int argc, char **argv) {"props", no_argument, NULL, 'p'}, {"parsable", no_argument, NULL, 'P'}, {"dedup", no_argument, NULL, 'D'}, + {"proctitle", no_argument, NULL, 'V'}, {"verbose", no_argument, NULL, 'v'}, {"dryrun", no_argument, NULL, 'n'}, {"large-block", no_argument, NULL, 'L'}, @@ -4421,7 +4465,7 @@ zfs_do_send(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RsDpvnPLeht:cwbd:S", + while ((c = getopt_long(argc, argv, ":i:I:RsDpVvnPLeht:cwbd:S", long_options, NULL)) != -1) { switch (c) { case 'i': @@ -4456,6 +4500,9 @@ zfs_do_send(int argc, char **argv) case 'P': flags.parsable = B_TRUE; break; + case 'V': + flags.progressastitle = B_TRUE; + break; case 'v': flags.verbosity++; flags.progress = B_TRUE; @@ -4531,7 +4578,7 @@ zfs_do_send(int argc, char **argv) } } - if (flags.parsable && flags.verbosity == 0) + if ((flags.parsable || flags.progressastitle) && flags.verbosity == 0) flags.verbosity = 1; argc -= optind; @@ -8693,6 +8740,8 @@ main(int argc, char **argv) libzfs_print_on_error(g_zfs, B_TRUE); + zfs_setproctitle_init(argc, argv, environ); + /* * Many commands modify input strings for string parsing reasons. * We create a copy to protect the original argv. diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 370d6b371a50..2311d4f046f6 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -4205,6 +4205,8 @@ print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, unsigned int namewidth; const char *title; + color_start(ANSI_BOLD); + if (cb->cb_flags & IOS_ANYHISTO_M) { title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; } else if (cb->cb_vdev_names_count) { @@ -4238,6 +4240,8 @@ print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, if (cb->vcdl != NULL) print_cmd_columns(cb->vcdl, 1); + color_end(); + printf("\n"); } @@ -4247,6 +4251,37 @@ print_iostat_header(iostat_cbdata_t *cb) print_iostat_header_impl(cb, 0, NULL); } +/* + * Prints a size string (i.e. 120M) with the suffix ("M") colored + * by order of magnitude. Uses column_size to add padding. + */ +static void +print_stat_color(const char *statbuf, unsigned int column_size) +{ + fputs(" ", stdout); + size_t len = strlen(statbuf); + while (len < column_size) { + fputc(' ', stdout); + column_size--; + } + if (*statbuf == '0') { + color_start(ANSI_GRAY); + fputc('0', stdout); + } else { + for (; *statbuf; statbuf++) { + if (*statbuf == 'K') color_start(ANSI_GREEN); + else if (*statbuf == 'M') color_start(ANSI_YELLOW); + else if (*statbuf == 'G') color_start(ANSI_RED); + else if (*statbuf == 'T') color_start(ANSI_BOLD_BLUE); + else if (*statbuf == 'P') color_start(ANSI_MAGENTA); + else if (*statbuf == 'E') color_start(ANSI_CYAN); + fputc(*statbuf, stdout); + if (--column_size <= 0) + break; + } + } + color_end(); +} /* * Display a single statistic. @@ -4262,7 +4297,7 @@ print_one_stat(uint64_t value, enum zfs_nicenum_format format, if (scripted) printf("\t%s", buf); else - printf(" %*s", column_size, buf); + print_stat_color(buf, column_size); } /* diff --git a/sys/contrib/openzfs/cmd/ztest/ztest.c b/sys/contrib/openzfs/cmd/ztest/ztest.c index fb4297478cf1..b7dc3fcc5e51 100644 --- a/sys/contrib/openzfs/cmd/ztest/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest/ztest.c @@ -1184,7 +1184,7 @@ ztest_kill(ztest_shared_t *zs) * See comment above spa_write_cachefile(). */ mutex_enter(&spa_namespace_lock); - spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); + spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE); mutex_exit(&spa_namespace_lock); (void) kill(getpid(), SIGKILL); diff --git a/sys/contrib/openzfs/config/always-python.m4 b/sys/contrib/openzfs/config/always-python.m4 index 5f47df424c27..5a2008124f72 100644 --- a/sys/contrib/openzfs/config/always-python.m4 +++ b/sys/contrib/openzfs/config/always-python.m4 @@ -1,7 +1,6 @@ dnl # dnl # The majority of the python scripts are written to be compatible -dnl # with Python 2.6 and Python 3.4. Therefore, they may be installed -dnl # and used with either interpreter. This option is intended to +dnl # with Python 3.6. This option is intended to dnl # to provide a method to specify the default system version, and dnl # set the PYTHON environment variable accordingly. dnl # @@ -13,9 +12,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ [with_python=check]) AS_CASE([$with_python], - [check], [AC_CHECK_PROGS([PYTHON], [python3 python2], [:])], - [2*], [PYTHON="python${with_python}"], - [*python2*], [PYTHON="${with_python}"], + [check], [AC_CHECK_PROGS([PYTHON], [python3], [:])], [3*], [PYTHON="python${with_python}"], [*python3*], [PYTHON="${with_python}"], [no], [PYTHON=":"], @@ -23,8 +20,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ ) dnl # - dnl # Minimum supported Python versions for utilities: - dnl # Python 2.6 or Python 3.4 + dnl # Minimum supported Python versions for utilities: Python 3.6 dnl # AM_PATH_PYTHON([], [], [:]) AS_IF([test -z "$PYTHON_VERSION"], [ @@ -33,25 +29,16 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ PYTHON_MINOR=${PYTHON_VERSION#*\.} AS_CASE([$PYTHON_VERSION], - [2.*], [ - AS_IF([test $PYTHON_MINOR -lt 6], - [AC_MSG_ERROR("Python >= 2.6 is required")]) - ], [3.*], [ - AS_IF([test $PYTHON_MINOR -lt 4], - [AC_MSG_ERROR("Python >= 3.4 is required")]) + AS_IF([test $PYTHON_MINOR -lt 6], + [AC_MSG_ERROR("Python >= 3.6 is required")]) ], [:|2|3], [], [PYTHON_VERSION=3] ) AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) - AM_CONDITIONAL([USING_PYTHON_2], [test "x${PYTHON_VERSION%%\.*}" = x2]) - AM_CONDITIONAL([USING_PYTHON_3], [test "x${PYTHON_VERSION%%\.*}" = x3]) - - AM_COND_IF([USING_PYTHON_2], - [AC_SUBST([PYTHON_SHEBANG], [python2])], - [AC_SUBST([PYTHON_SHEBANG], [python3])]) + AC_SUBST([PYTHON_SHEBANG], [python3]) dnl # dnl # Request that packages be built for a specific Python version. diff --git a/sys/contrib/openzfs/config/always-pyzfs.m4 b/sys/contrib/openzfs/config/always-pyzfs.m4 index efea49f5f025..9b123b1b2db1 100644 --- a/sys/contrib/openzfs/config/always-pyzfs.m4 +++ b/sys/contrib/openzfs/config/always-pyzfs.m4 @@ -18,7 +18,7 @@ AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ ]) dnl # -dnl # Determines if pyzfs can be built, requires Python 2.7 or later. +dnl # Determines if pyzfs can be built, requires Python 3.6 or later. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ AC_ARG_ENABLE([pyzfs], @@ -72,12 +72,11 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ]) dnl # - dnl # Require python-devel libraries + dnl # Require python3-devel libraries dnl # AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ AS_CASE([$PYTHON_VERSION], - [3.*], [PYTHON_REQUIRED_VERSION=">= '3.4.0'"], - [2.*], [PYTHON_REQUIRED_VERSION=">= '2.7.0'"], + [3.*], [PYTHON_REQUIRED_VERSION=">= '3.6.0'"], [AC_MSG_ERROR("Python $PYTHON_VERSION unknown")] ) diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4 index 462d6c6efa8e..28e5364581ea 100644 --- a/sys/contrib/openzfs/config/kernel-blkdev.m4 +++ b/sys/contrib/openzfs/config/kernel-blkdev.m4 @@ -104,6 +104,57 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [ ]) dnl # +dnl # bdev_kobj() is introduced from 5.12 +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ], [ + ZFS_LINUX_TEST_SRC([bdev_kobj], [ + #include <linux/fs.h> + #include <linux/blkdev.h> + #include <linux/kobject.h> + ], [ + struct block_device *bdev = NULL; + struct kobject *disk_kobj; + disk_kobj = bdev_kobj(bdev); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ], [ + AC_MSG_CHECKING([whether bdev_kobj() exists]) + ZFS_LINUX_TEST_RESULT([bdev_kobj], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDEV_KOBJ, 1, + [bdev_kobj() exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # part_to_dev() was removed in 5.12 +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV], [ + ZFS_LINUX_TEST_SRC([part_to_dev], [ + #include <linux/fs.h> + #include <linux/blkdev.h> + ], [ + struct hd_struct *p = NULL; + struct device *pdev; + pdev = part_to_dev(p); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV], [ + AC_MSG_CHECKING([whether part_to_dev() exists]) + ZFS_LINUX_TEST_RESULT([part_to_dev], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PART_TO_DEV, 1, + [part_to_dev() exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # dnl # 5.10 API, check_disk_change() is removed, in favor of dnl # bdev_check_media_change(), which doesn't force revalidation dnl # @@ -405,6 +456,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [ ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE + ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ + ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV ]) AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ @@ -421,4 +474,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ ZFS_AC_KERNEL_BLKDEV_BDEVNAME ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE + ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ + ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV ]) diff --git a/sys/contrib/openzfs/config/kernel-generic_io_acct.m4 b/sys/contrib/openzfs/config/kernel-generic_io_acct.m4 index a8a448c6fe96..a6a109004294 100644 --- a/sys/contrib/openzfs/config/kernel-generic_io_acct.m4 +++ b/sys/contrib/openzfs/config/kernel-generic_io_acct.m4 @@ -2,7 +2,20 @@ dnl # dnl # Check for generic io accounting interface. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ - ZFS_LINUX_TEST_SRC([bdev_io_acct], [ + ZFS_LINUX_TEST_SRC([bdev_io_acct_63], [ + #include <linux/blkdev.h> + ], [ + struct block_device *bdev = NULL; + struct bio *bio = NULL; + unsigned long passed_time = 0; + unsigned long start_time; + + start_time = bdev_start_io_acct(bdev, bio_op(bio), + passed_time); + bdev_end_io_acct(bdev, bio_op(bio), bio_sectors(bio), start_time); + ]) + + ZFS_LINUX_TEST_SRC([bdev_io_acct_old], [ #include <linux/blkdev.h> ], [ struct block_device *bdev = NULL; @@ -63,74 +76,85 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ dnl # - dnl # 5.19 API, + dnl # Linux 6.3, and then backports thereof, changed + dnl # the signatures on bdev_start_io_acct/bdev_end_io_acct dnl # - dnl # disk_start_io_acct() and disk_end_io_acct() have been replaced by - dnl # bdev_start_io_acct() and bdev_end_io_acct(). - dnl # - AC_MSG_CHECKING([whether generic bdev_*_io_acct() are available]) - ZFS_LINUX_TEST_RESULT([bdev_io_acct], [ + AC_MSG_CHECKING([whether 6.3+ bdev_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([bdev_io_acct_63], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BDEV_IO_ACCT, 1, [bdev_*_io_acct() available]) + AC_DEFINE(HAVE_BDEV_IO_ACCT_63, 1, [bdev_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 5.12 API, + dnl # 5.19 API, dnl # - dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported - dnl # so use disk_start_io_acct() and disk_end_io_acct() instead + dnl # disk_start_io_acct() and disk_end_io_acct() have been replaced by + dnl # bdev_start_io_acct() and bdev_end_io_acct(). dnl # - AC_MSG_CHECKING([whether generic disk_*_io_acct() are available]) - ZFS_LINUX_TEST_RESULT([disk_io_acct], [ + AC_MSG_CHECKING([whether pre-6.3 bdev_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([bdev_io_acct_old], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available]) + AC_DEFINE(HAVE_BDEV_IO_ACCT_OLD, 1, [bdev_*_io_acct() available]) ], [ AC_MSG_RESULT(no) - dnl # - dnl # 5.7 API, + dnl # 5.12 API, dnl # - dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers. + dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported + dnl # so use disk_start_io_acct() and disk_end_io_acct() instead dnl # - AC_MSG_CHECKING([whether generic bio_*_io_acct() are available]) - ZFS_LINUX_TEST_RESULT([bio_io_acct], [ + AC_MSG_CHECKING([whether generic disk_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([disk_io_acct], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available]) + AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 4.14 API, + dnl # 5.7 API, dnl # - dnl # generic_start_io_acct/generic_end_io_acct now require - dnl # request_queue to be provided. No functional changes, - dnl # but preparation for inflight accounting. + dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers. dnl # - AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], - [generic_start_io_acct], [block/bio.c], [ + AC_MSG_CHECKING([whether generic bio_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([bio_io_acct], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, - [generic_*_io_acct() 4 arg available]) + AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 3.19 API addition + dnl # 4.14 API, dnl # - dnl # torvalds/linux@394ffa50 allows us to increment - dnl # iostat counters without generic_make_request(). + dnl # generic_start_io_acct/generic_end_io_acct now require + dnl # request_queue to be provided. No functional changes, + dnl # but preparation for inflight accounting. dnl # - AC_MSG_CHECKING( - [whether generic_*_io_acct wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], [generic_start_io_acct], [block/bio.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_*_io_acct() 3 arg available]) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, + [generic_*_io_acct() 4 arg available]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 3.19 API addition + dnl # + dnl # torvalds/linux@394ffa50 allows us to increment + dnl # iostat counters without generic_make_request(). + dnl # + AC_MSG_CHECKING( + [whether generic_*_io_acct wants 3 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + [generic_start_io_acct], [block/bio.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, + [generic_*_io_acct() 3 arg available]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) ]) diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index 1bd2c9259766..2ab6765c3a30 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -518,6 +518,8 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ VENDOR=alpine ; elif test -f /bin/freebsd-version ; then VENDOR=freebsd ; + elif test -f /etc/openEuler-release ; then + VENDOR=openeuler ; else VENDOR= ; fi], @@ -542,6 +544,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ ubuntu) DEFAULT_PACKAGE=deb ;; debian) DEFAULT_PACKAGE=deb ;; freebsd) DEFAULT_PACKAGE=pkg ;; + openeuler) DEFAULT_PACKAGE=rpm ;; *) DEFAULT_PACKAGE=rpm ;; esac AC_MSG_RESULT([$DEFAULT_PACKAGE]) @@ -555,35 +558,14 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_RESULT([$initdir]) AC_SUBST(initdir) - AC_MSG_CHECKING([default init script type and shell]) + AC_MSG_CHECKING([default shell]) case "$VENDOR" in - toss) DEFAULT_INIT_SCRIPT=redhat ;; - redhat) DEFAULT_INIT_SCRIPT=redhat ;; - fedora) DEFAULT_INIT_SCRIPT=fedora ;; - gentoo) DEFAULT_INIT_SCRIPT=openrc ;; - alpine) DEFAULT_INIT_SCRIPT=openrc ;; - arch) DEFAULT_INIT_SCRIPT=lsb ;; - sles) DEFAULT_INIT_SCRIPT=lsb ;; - slackware) DEFAULT_INIT_SCRIPT=lsb ;; - lunar) DEFAULT_INIT_SCRIPT=lunar ;; - ubuntu) DEFAULT_INIT_SCRIPT=lsb ;; - debian) DEFAULT_INIT_SCRIPT=lsb ;; - freebsd) DEFAULT_INIT_SCRIPT=freebsd;; - *) DEFAULT_INIT_SCRIPT=lsb ;; + gentoo) DEFAULT_INIT_SHELL="/sbin/openrc-run";; + alpine) DEFAULT_INIT_SHELL="/sbin/openrc-run";; + *) DEFAULT_INIT_SHELL="/bin/sh" ;; esac - # On gentoo, it's possible that OpenRC isn't installed. Check if - # /sbin/openrc-run exists, and if not, fall back to generic defaults. - - DEFAULT_INIT_SHELL="/bin/sh" - AS_IF([test "$DEFAULT_INIT_SCRIPT" = "openrc"], [ - AS_IF([test -x "/sbin/openrc-run"], - [DEFAULT_INIT_SHELL="/sbin/openrc-run"], - [DEFAULT_INIT_SCRIPT=lsb]) - ]) - - AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT:$DEFAULT_INIT_SHELL]) - AC_SUBST(DEFAULT_INIT_SCRIPT) + AC_MSG_RESULT([$DEFAULT_INIT_SHELL]) AC_SUBST(DEFAULT_INIT_SHELL) AC_MSG_CHECKING([default nfs server init script]) @@ -602,6 +584,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ redhat) initconfdir=/etc/sysconfig ;; fedora) initconfdir=/etc/sysconfig ;; sles) initconfdir=/etc/sysconfig ;; + openeuler) initconfdir=/etc/sysconfig ;; ubuntu) initconfdir=/etc/default ;; debian) initconfdir=/etc/default ;; freebsd) initconfdir=$sysconfdir/rc.conf.d;; diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/Makefile.am b/sys/contrib/openzfs/contrib/dracut/90zfs/Makefile.am index 3f7050300994..5a68e01f05c3 100644 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/Makefile.am +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/Makefile.am @@ -15,6 +15,7 @@ pkgdracut_SCRIPTS = \ pkgdracut_DATA = \ zfs-env-bootfs.service \ + zfs-nonroot-necessities.service \ zfs-snapshot-bootfs.service \ zfs-rollback-bootfs.service diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in index 4ac302de507c..a247e2ad2e94 100755 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in @@ -84,6 +84,9 @@ install() { inst_simple "${moddir}/zfs-env-bootfs.service" "${systemdsystemunitdir}/zfs-env-bootfs.service" systemctl -q --root "${initdir}" add-wants zfs-import.target zfs-env-bootfs.service + inst_simple "${moddir}/zfs-nonroot-necessities.service" "${systemdsystemunitdir}/zfs-nonroot-necessities.service" + systemctl -q --root "${initdir}" add-requires initrd-root-fs.target zfs-nonroot-necessities.service + for _service in \ "zfs-import-scan.service" \ "zfs-import-cache.service"; do diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in index 34c88037cac2..7ebab4c1a58d 100644 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in @@ -1,6 +1,5 @@ [Unit] -Description=Set BOOTFS environment for dracut -Documentation=man:zpool(8) +Description=Set BOOTFS and BOOTFSFLAGS environment variables for dracut DefaultDependencies=no After=zfs-import-cache.service After=zfs-import-scan.service @@ -8,7 +7,17 @@ Before=zfs-import.target [Service] Type=oneshot -ExecStart=/bin/sh -c "exec systemctl set-environment BOOTFS=$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)" +ExecStart=/bin/sh -c ' \ + . /lib/dracut-zfs-lib.sh; \ + decode_root_args || exit 0; \ + [ "$root" = "zfs:AUTO" ] && root="$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"; \ + rootflags="$(getarg rootflags=)"; \ + case ",$rootflags," in \ + *,zfsutil,*) ;; \ + ,,) rootflags=zfsutil ;; \ + *) rootflags="zfsutil,$rootflags" ;; \ + esac; \ + exec systemctl set-environment BOOTFS="$root" BOOTFSFLAGS="$rootflags"' [Install] WantedBy=zfs-import.target diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-generator.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-generator.sh.in index 56f7ca9785ba..4e1eb7490e0d 100755 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-generator.sh.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-generator.sh.in @@ -14,81 +14,24 @@ GENERATOR_DIR="$1" . /lib/dracut-zfs-lib.sh decode_root_args || exit 0 -[ -z "${rootflags}" ] && rootflags=$(getarg rootflags=) -case ",${rootflags}," in - *,zfsutil,*) ;; - ,,) rootflags=zfsutil ;; - *) rootflags="zfsutil,${rootflags}" ;; -esac - [ -n "$debug" ] && echo "zfs-generator: writing extension for sysroot.mount to $GENERATOR_DIR/sysroot.mount.d/zfs-enhancement.conf" >> /dev/kmsg -mkdir -p "$GENERATOR_DIR"/sysroot.mount.d "$GENERATOR_DIR"/initrd-root-fs.target.requires "$GENERATOR_DIR"/dracut-pre-mount.service.d +mkdir -p "$GENERATOR_DIR"/sysroot.mount.d "$GENERATOR_DIR"/dracut-pre-mount.service.d + { echo "[Unit]" echo "Before=initrd-root-fs.target" echo "After=zfs-import.target" echo echo "[Mount]" - if [ "${root}" = "zfs:AUTO" ]; then - echo "PassEnvironment=BOOTFS" - echo 'What=${BOOTFS}' - else - echo "What=${root}" - fi + echo "PassEnvironment=BOOTFS BOOTFSFLAGS" + echo 'What=${BOOTFS}' echo "Type=zfs" - echo "Options=${rootflags}" + echo 'Options=${BOOTFSFLAGS}' } > "$GENERATOR_DIR"/sysroot.mount.d/zfs-enhancement.conf ln -fs ../sysroot.mount "$GENERATOR_DIR"/initrd-root-fs.target.requires/sysroot.mount - -if [ "${root}" = "zfs:AUTO" ]; then - { - echo "[Unit]" - echo "Before=initrd-root-fs.target" - echo "After=sysroot.mount" - echo "DefaultDependencies=no" - echo - echo "[Service]" - echo "Type=oneshot" - echo "PassEnvironment=BOOTFS" - echo "ExecStart=/bin/sh -c '" ' \ - . /lib/dracut-zfs-lib.sh; \ - _zfs_nonroot_necessities_cb() { \ - zfs mount | grep -m1 -q "^$1 " && return 0; \ - echo "Mounting $1 on /sysroot$2"; \ - mount -o zfsutil -t zfs "$1" "/sysroot$2"; \ - }; \ - for_relevant_root_children "${BOOTFS}" _zfs_nonroot_necessities_cb;' \ - "'" - } > "$GENERATOR_DIR"/zfs-nonroot-necessities.service - ln -fs ../zfs-nonroot-necessities.service "$GENERATOR_DIR"/initrd-root-fs.target.requires/zfs-nonroot-necessities.service -else - # We can solve this statically at generation time, so do! - _zfs_generator_cb() { - dset="${1}" - mpnt="${2}" - unit="$(systemd-escape --suffix=mount -p "/sysroot${mpnt}")" - - { - echo "[Unit]" - echo "Before=initrd-root-fs.target" - echo "After=sysroot.mount" - echo - echo "[Mount]" - echo "Where=/sysroot${mpnt}" - echo "What=${dset}" - echo "Type=zfs" - echo "Options=zfsutil" - } > "$GENERATOR_DIR/${unit}" - ln -fs ../"${unit}" "$GENERATOR_DIR"/initrd-root-fs.target.requires/"${unit}" - } - - for_relevant_root_children "${root}" _zfs_generator_cb -fi - - { echo "[Unit]" echo "After=zfs-import.target" diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-lib.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-lib.sh.in index a91b56ba7f3c..171616bce072 100755 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-lib.sh.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-lib.sh.in @@ -38,7 +38,7 @@ mount_dataset() { # for_relevant_root_children DATASET EXEC # Runs "EXEC dataset mountpoint" for all children of DATASET that are needed for system bringup -# Used by zfs-generator.sh and friends, too! +# Used by zfs-nonroot-necessities.service and friends, too! for_relevant_root_children() { dataset="${1}" exec="${2}" diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in new file mode 100644 index 000000000000..8f420c737c72 --- /dev/null +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in @@ -0,0 +1,20 @@ +[Unit] +Before=initrd-root-fs.target +After=sysroot.mount +DefaultDependencies=no +ConditionEnvironment=BOOTFS + +[Service] +Type=oneshot +PassEnvironment=BOOTFS +ExecStart=/bin/sh -c ' \ + . /lib/dracut-zfs-lib.sh; \ + _zfs_nonroot_necessities_cb() { \ + @sbindir@/zfs mount | grep -m1 -q "^$1 " && return 0; \ + echo "Mounting $1 on /sysroot$2"; \ + mount -o zfsutil -t zfs "$1" "/sysroot$2"; \ + }; \ + for_relevant_root_children "${BOOTFS}" _zfs_nonroot_necessities_cb' + +[Install] +RequiredBy=initrd-root-fs.target diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in index a29cf3a3dd81..68fdcb1f323e 100644 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in @@ -5,8 +5,9 @@ After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service Before=dracut-mount.service DefaultDependencies=no ConditionKernelCommandLine=bootfs.rollback +ConditionEnvironment=BOOTFS [Service] Type=oneshot -ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS"; SNAPNAME="$(getarg bootfs.rollback)"; exec @sbindir@/zfs rollback -Rf "$root@${SNAPNAME:-%v}"' +ExecStart=/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.rollback)"; exec @sbindir@/zfs rollback -Rf "$BOOTFS@${SNAPNAME:-%v}"' RemainAfterExit=yes diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in index 9e73d1a78724..a675b5b2ea98 100644 --- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in +++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in @@ -5,8 +5,9 @@ After=zfs-import.target dracut-pre-mount.service Before=dracut-mount.service DefaultDependencies=no ConditionKernelCommandLine=bootfs.snapshot +ConditionEnvironment=BOOTFS [Service] Type=oneshot -ExecStart=-/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS"; SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"' +ExecStart=-/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$BOOTFS@${SNAPNAME:-%v}"' RemainAfterExit=yes diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/Makefile.am b/sys/contrib/openzfs/contrib/initramfs/scripts/Makefile.am index 444a5f374bfe..5bcbfb96b2a4 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/Makefile.am +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/Makefile.am @@ -7,5 +7,6 @@ dist_scripts_SCRIPTS = \ SUBDIRS = local-top +SHELLCHECK_IGNORE = ,SC2295 SHELLCHECKDIRS = $(SUBDIRS) SHELLCHECK_SHELL = sh diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index e25ce689541e..4ce739fda704 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -192,7 +192,7 @@ import_pool() # Verify that the pool isn't already imported # Make as sure as we can to not require '-f' to import. - "${ZPOOL}" get name,guid -o value -H 2>/dev/null | grep -Fxq "$pool" && return 0 + "${ZPOOL}" get -H -o value name,guid 2>/dev/null | grep -Fxq "$pool" && return 0 # For backwards compatibility, make sure that ZPOOL_IMPORT_PATH is set # to something we can use later with the real import(s). We want to @@ -341,7 +341,10 @@ mount_fs() # isn't the root fs. return 0 fi - ZFS_CMD="mount.zfs" + # Don't use mount.zfs -o zfsutils for legacy mountpoint + if [ "$mountpoint" = "legacy" ]; then + ZFS_CMD="mount.zfs" + fi # Last hail-mary: Hope 'rootmnt' is set! mountpoint="" else @@ -880,12 +883,12 @@ mountroot() fi # In case the pool was specified as guid, resolve guid to name - pool="$("${ZPOOL}" get name,guid -o name,value -H | \ + pool="$("${ZPOOL}" get -H -o name,value name,guid | \ awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')" if [ -n "$pool" ]; then # If $ZFS_BOOTFS contains guid, replace the guid portion with $pool ZFS_BOOTFS=$(echo "$ZFS_BOOTFS" | \ - sed -e "s/$("${ZPOOL}" get guid -o value "$pool" -H)/$pool/g") + sed -e "s/$("${ZPOOL}" get -H -o value guid "$pool")/$pool/g") ZFS_RPOOL="${pool}" fi diff --git a/sys/contrib/openzfs/contrib/pyzfs/setup.py.in b/sys/contrib/openzfs/contrib/pyzfs/setup.py.in index bd8ffc728fa6..934b3189ebe1 100644 --- a/sys/contrib/openzfs/contrib/pyzfs/setup.py.in +++ b/sys/contrib/openzfs/contrib/pyzfs/setup.py.in @@ -29,13 +29,12 @@ setup( "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: System :: Filesystems", "Topic :: Software Development :: Libraries", ], @@ -53,7 +52,7 @@ setup( setup_requires=[ "cffi", ], - python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,<4', + python_requires='>=3.6,<4', zip_safe=False, test_suite="libzfs_core.test", ) diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 98942b41982c..214a188f9474 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -307,6 +307,7 @@ extern int zpool_vdev_remove_cancel(zpool_handle_t *); extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); +_LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); @@ -692,6 +693,9 @@ typedef struct sendflags { /* show progress (ie. -v) */ boolean_t progress; + /* show progress as process title (ie. -V) */ + boolean_t progressastitle; + /* large blocks (>128K) are permitted */ boolean_t largeblock; diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h index f63a1fa8c8f0..15024a4e8888 100644 --- a/sys/contrib/openzfs/include/libzutil.h +++ b/sys/contrib/openzfs/include/libzutil.h @@ -150,17 +150,33 @@ int zfs_ioctl_fd(int fd, unsigned long request, struct zfs_cmd *zc); /* * List of colors to use */ +#define ANSI_BLACK "\033[0;30m" #define ANSI_RED "\033[0;31m" #define ANSI_GREEN "\033[0;32m" #define ANSI_YELLOW "\033[0;33m" #define ANSI_BLUE "\033[0;34m" +#define ANSI_BOLD_BLUE "\033[1;34m" /* light blue */ +#define ANSI_MAGENTA "\033[0;35m" +#define ANSI_CYAN "\033[0;36m" +#define ANSI_GRAY "\033[0;37m" + #define ANSI_RESET "\033[0m" #define ANSI_BOLD "\033[1m" +int use_color(void); void color_start(const char *color); void color_end(void); int printf_color(const char *color, char *format, ...); +#ifdef __linux__ +extern char **environ; +_LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]); +_LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); +#else +#define zfs_setproctitle(fmt, ...) setproctitle(fmt, ##__VA_ARGS__) +#define zfs_setproctitle_init(x, y, z) ((void)0) +#endif + /* * These functions are used by the ZFS libraries and cmd/zpool code, but are * not exported in the ABI. diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h index bac5c2279d29..02a269a89fff 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h @@ -257,6 +257,32 @@ bio_set_bi_error(struct bio *bio, int error) #endif /* HAVE_1ARG_BIO_END_IO_T */ /* + * 5.15 MACRO, + * GD_DEAD + * + * 2.6.36 - 5.14 MACRO, + * GENHD_FL_UP + * + * Check the disk status and return B_TRUE if alive + * otherwise B_FALSE + */ +static inline boolean_t +zfs_check_disk_status(struct block_device *bdev) +{ +#if defined(GENHD_FL_UP) + return (!!(bdev->bd_disk->flags & GENHD_FL_UP)); +#elif defined(GD_DEAD) + return (!test_bit(GD_DEAD, &bdev->bd_disk->state)); +#else +/* + * This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in + * the kernel - likely due to an MACRO change that needs to be chased down. + */ +#error "Unsupported kernel: no usable disk status check" +#endif +} + +/* * 4.1 API, * 3.10.0 CentOS 7.x API, * blkdev_reread_part() @@ -389,7 +415,7 @@ static inline void bio_set_flush(struct bio *bio) { #if defined(HAVE_REQ_PREFLUSH) /* >= 4.10 */ - bio_set_op_attrs(bio, 0, REQ_PREFLUSH); + bio_set_op_attrs(bio, 0, REQ_PREFLUSH | REQ_OP_WRITE); #elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */ bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA); #else @@ -555,7 +581,10 @@ blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)), struct gendisk *disk __attribute__((unused)), int rw __attribute__((unused)), struct bio *bio) { -#if defined(HAVE_BDEV_IO_ACCT) +#if defined(HAVE_BDEV_IO_ACCT_63) + return (bdev_start_io_acct(bio->bi_bdev, bio_op(bio), + jiffies)); +#elif defined(HAVE_BDEV_IO_ACCT_OLD) return (bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio), jiffies)); #elif defined(HAVE_DISK_IO_ACCT) @@ -581,7 +610,10 @@ blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)), struct gendisk *disk __attribute__((unused)), int rw __attribute__((unused)), struct bio *bio, unsigned long start_time) { -#if defined(HAVE_BDEV_IO_ACCT) +#if defined(HAVE_BDEV_IO_ACCT_63) + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), bio_sectors(bio), + start_time); +#elif defined(HAVE_BDEV_IO_ACCT_OLD) bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); #elif defined(HAVE_DISK_IO_ACCT) disk_end_io_acct(disk, bio_op(bio), start_time); diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/Makefile.am b/sys/contrib/openzfs/include/os/linux/spl/sys/Makefile.am index 48c27f970fc9..450baffc395e 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/Makefile.am +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/Makefile.am @@ -20,6 +20,7 @@ KERNEL_H = \ kmem.h \ kstat.h \ list.h \ + misc.h \ mod_os.h \ mutex.h \ param.h \ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h new file mode 100644 index 000000000000..299fe9c1ab07 --- /dev/null +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h @@ -0,0 +1,29 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _OS_LINUX_SPL_MISC_H +#define _OS_LINUX_SPL_MISC_H + +#include <linux/kobject.h> + +extern void spl_signal_kobj_evt(struct block_device *bdev); + +#endif diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index 43818d4104c4..db6238fda61e 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -30,6 +30,7 @@ #define _SYS_ARC_IMPL_H #include <sys/arc.h> +#include <sys/multilist.h> #include <sys/zio_crypt.h> #include <sys/zthr.h> #include <sys/aggsum.h> diff --git a/sys/contrib/openzfs/include/sys/bpobj.h b/sys/contrib/openzfs/include/sys/bpobj.h index 16e403526cff..2bca0a82e5eb 100644 --- a/sys/contrib/openzfs/include/sys/bpobj.h +++ b/sys/contrib/openzfs/include/sys/bpobj.h @@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, int64_t start); void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx); +void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj); void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx); diff --git a/sys/contrib/openzfs/include/sys/btree.h b/sys/contrib/openzfs/include/sys/btree.h index a901d654ef1c..883abb5181c9 100644 --- a/sys/contrib/openzfs/include/sys/btree.h +++ b/sys/contrib/openzfs/include/sys/btree.h @@ -65,7 +65,7 @@ extern "C" { * them, and increased memory overhead. Increasing these values results in * higher variance in operation time, and reduces memory overhead. */ -#define BTREE_CORE_ELEMS 128 +#define BTREE_CORE_ELEMS 126 #define BTREE_LEAF_SIZE 4096 extern kmem_cache_t *zfs_btree_leaf_cache; @@ -95,9 +95,6 @@ typedef struct zfs_btree_leaf { uint8_t btl_elems[]; } zfs_btree_leaf_t; -#define BTREE_LEAF_ESIZE (BTREE_LEAF_SIZE - \ - offsetof(zfs_btree_leaf_t, btl_elems)) - typedef struct zfs_btree_index { zfs_btree_hdr_t *bti_node; uint32_t bti_offset; @@ -109,14 +106,15 @@ typedef struct zfs_btree_index { } zfs_btree_index_t; typedef struct btree { - zfs_btree_hdr_t *bt_root; - int64_t bt_height; + int (*bt_compar) (const void *, const void *); size_t bt_elem_size; + size_t bt_leaf_size; uint32_t bt_leaf_cap; + int32_t bt_height; uint64_t bt_num_elems; uint64_t bt_num_nodes; + zfs_btree_hdr_t *bt_root; zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading - int (*bt_compar) (const void *, const void *); } zfs_btree_t; /* @@ -132,9 +130,12 @@ void zfs_btree_fini(void); * compar - function to compare two nodes, it must return exactly: -1, 0, or +1 * -1 for <, 0 for ==, and +1 for > * size - the value of sizeof(struct my_type) + * lsize - custom leaf size */ void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *), size_t); +void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *), + size_t, size_t); /* * Find a node with a matching value in the tree. Returns the matching node diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index bae393eeba0c..20b7c2aaf2be 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -36,6 +36,7 @@ #include <sys/dmu_zfetch.h> #include <sys/zrlock.h> #include <sys/multilist.h> +#include <sys/wmsum.h> #ifdef __cplusplus extern "C" { @@ -587,10 +588,42 @@ typedef struct dnode_stats { kstat_named_t dnode_move_active; } dnode_stats_t; +typedef struct dnode_sums { + wmsum_t dnode_hold_dbuf_hold; + wmsum_t dnode_hold_dbuf_read; + wmsum_t dnode_hold_alloc_hits; + wmsum_t dnode_hold_alloc_misses; + wmsum_t dnode_hold_alloc_interior; + wmsum_t dnode_hold_alloc_lock_retry; + wmsum_t dnode_hold_alloc_lock_misses; + wmsum_t dnode_hold_alloc_type_none; + wmsum_t dnode_hold_free_hits; + wmsum_t dnode_hold_free_misses; + wmsum_t dnode_hold_free_lock_misses; + wmsum_t dnode_hold_free_lock_retry; + wmsum_t dnode_hold_free_refcount; + wmsum_t dnode_hold_free_overflow; + wmsum_t dnode_free_interior_lock_retry; + wmsum_t dnode_allocate; + wmsum_t dnode_reallocate; + wmsum_t dnode_buf_evict; + wmsum_t dnode_alloc_next_chunk; + wmsum_t dnode_alloc_race; + wmsum_t dnode_alloc_next_block; + wmsum_t dnode_move_invalid; + wmsum_t dnode_move_recheck1; + wmsum_t dnode_move_recheck2; + wmsum_t dnode_move_special; + wmsum_t dnode_move_handle; + wmsum_t dnode_move_rwlock; + wmsum_t dnode_move_active; +} dnode_sums_t; + extern dnode_stats_t dnode_stats; +extern dnode_sums_t dnode_sums; #define DNODE_STAT_INCR(stat, val) \ - atomic_add_64(&dnode_stats.stat.value.ui64, (val)); + wmsum_add(&dnode_sums.stat, (val)) #define DNODE_STAT_BUMP(stat) \ DNODE_STAT_INCR(stat, 1); diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h index f4dc2ab46536..111e70ece151 100644 --- a/sys/contrib/openzfs/include/sys/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fs/zfs.h @@ -1518,6 +1518,7 @@ typedef enum { #define ZFS_ONLINE_UNSPARE 0x2 #define ZFS_ONLINE_FORCEFAULT 0x4 #define ZFS_ONLINE_EXPAND 0x8 +#define ZFS_ONLINE_SPARE 0x10 #define ZFS_OFFLINE_TEMPORARY 0x1 /* diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index 67724a68f0e8..fedadab459b7 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -837,7 +837,7 @@ extern kmutex_t spa_namespace_lock; #define SPA_CONFIG_UPDATE_POOL 0 #define SPA_CONFIG_UPDATE_VDEVS 1 -extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t); +extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t); extern void spa_config_load(void); extern nvlist_t *spa_all_configs(uint64_t *); extern void spa_config_set(spa_t *spa, nvlist_t *config); diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h index f235bfc8cc19..de08bbf16413 100644 --- a/sys/contrib/openzfs/include/sys/vdev.h +++ b/sys/contrib/openzfs/include/sys/vdev.h @@ -147,6 +147,7 @@ extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux); extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *); extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags); +extern int vdev_remove_wanted(spa_t *spa, uint64_t guid); extern void vdev_clear(spa_t *spa, vdev_t *vd); extern boolean_t vdev_is_dead(vdev_t *vd); @@ -189,6 +190,8 @@ typedef enum vdev_config_flag { VDEV_CONFIG_MISSING = 1 << 4 } vdev_config_flag_t; +extern void vdev_post_kobj_evt(vdev_t *vd); +extern void vdev_clear_kobj_evt(vdev_t *vd); extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vdev_config_flag_t flags); diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h index da846d8504fe..9d4a8062b2d9 100644 --- a/sys/contrib/openzfs/include/sys/vdev_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_impl.h @@ -69,6 +69,7 @@ extern uint32_t zfs_vdev_async_write_max_active; * Virtual device operations */ typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd); +typedef void vdev_kobj_post_evt_func_t(vdev_t *vd); typedef void vdev_fini_func_t(vdev_t *vd); typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift, uint64_t *pshift); @@ -123,6 +124,7 @@ typedef const struct vdev_ops { vdev_config_generate_func_t *vdev_op_config_generate; vdev_nparity_func_t *vdev_op_nparity; vdev_ndisks_func_t *vdev_op_ndisks; + vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post; char vdev_op_type[16]; boolean_t vdev_op_leaf; } vdev_ops_t; @@ -435,6 +437,7 @@ struct vdev { boolean_t vdev_isl2cache; /* was a l2cache device */ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ boolean_t vdev_resilver_deferred; /* resilver deferred */ + boolean_t vdev_kobj_flag; /* kobj event record */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_cache_t vdev_cache; /* physical block cache */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ diff --git a/sys/contrib/openzfs/include/sys/zap_impl.h b/sys/contrib/openzfs/include/sys/zap_impl.h index 250dde3ce235..3c83448caa2b 100644 --- a/sys/contrib/openzfs/include/sys/zap_impl.h +++ b/sys/contrib/openzfs/include/sys/zap_impl.h @@ -66,10 +66,9 @@ typedef struct mzap_phys { } mzap_phys_t; typedef struct mzap_ent { - avl_node_t mze_node; - int mze_chunkid; - uint64_t mze_hash; - uint32_t mze_cd; /* copy from mze_phys->mze_cd */ + uint32_t mze_hash; + uint16_t mze_cd; /* copy from mze_phys->mze_cd */ + uint16_t mze_chunkid; } mzap_ent_t; #define MZE_PHYS(zap, mze) \ @@ -164,7 +163,7 @@ typedef struct zap { int16_t zap_num_entries; int16_t zap_num_chunks; int16_t zap_alloc_next; - avl_tree_t zap_avl; + zfs_btree_t zap_tree; } zap_micro; } zap_u; } zap_t; @@ -202,7 +201,7 @@ int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp); void zap_unlockdir(zap_t *zap, void *tag); void zap_evict_sync(void *dbu); -zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt); +zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt); void zap_name_free(zap_name_t *zn); int zap_hashbits(zap_t *zap); uint32_t zap_maxcd(zap_t *zap); diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h index a6ff94317195..235a73d5d782 100644 --- a/sys/contrib/openzfs/include/sys/zfs_context.h +++ b/sys/contrib/openzfs/include/sys/zfs_context.h @@ -51,6 +51,7 @@ extern "C" { #include <sys/kmem.h> #include <sys/kmem_cache.h> #include <sys/vmem.h> +#include <sys/misc.h> #include <sys/taskq.h> #include <sys/param.h> #include <sys/disp.h> diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h index ca32cb49c049..0df8a0e4b19a 100644 --- a/sys/contrib/openzfs/include/sys/zfs_znode.h +++ b/sys/contrib/openzfs/include/sys/zfs_znode.h @@ -217,9 +217,9 @@ typedef struct znode { typedef struct znode_hold { uint64_t zh_obj; /* object id */ - kmutex_t zh_lock; /* lock serializing object access */ avl_node_t zh_node; /* avl tree linkage */ - zfs_refcount_t zh_refcount; /* active consumer reference count */ + kmutex_t zh_lock; /* lock serializing object access */ + int zh_refcount; /* active consumer reference count */ } znode_hold_t; static inline uint64_t diff --git a/sys/contrib/openzfs/include/sys/zrlock.h b/sys/contrib/openzfs/include/sys/zrlock.h index b6eba1a18ff4..c8c656dc5518 100644 --- a/sys/contrib/openzfs/include/sys/zrlock.h +++ b/sys/contrib/openzfs/include/sys/zrlock.h @@ -34,9 +34,8 @@ extern "C" { typedef struct zrlock { kmutex_t zr_mtx; - volatile int32_t zr_refcount; kcondvar_t zr_cv; - uint16_t zr_pad; + volatile int32_t zr_refcount; #ifdef ZFS_DEBUG kthread_t *zr_owner; const char *zr_caller; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 14e03ee28ffe..13ce19df9b34 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -99,6 +99,7 @@ <elf-symbol name='sa_validate_shareopts' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='snapshot_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='unshare_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='use_color' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zcmd_alloc_dst_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zcmd_expand_dst_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zcmd_free_nvlists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -378,6 +379,7 @@ <elf-symbol name='zpool_vdev_path_to_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -5069,6 +5071,9 @@ <parameter type-id='9cf59a50'/> <return type-id='48b5725f'/> </function-decl> + <function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='mkdirp' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='80f4b756'/> <parameter type-id='d50d396c'/> @@ -5858,6 +5863,11 @@ <parameter type-id='c19b74c3' name='istmp'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zpool_vdev_remove_wanted' mangled-name='zpool_vdev_remove_wanted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove_wanted'> + <parameter type-id='4c81de99' name='zhp'/> + <parameter type-id='80f4b756' name='path'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zpool_vdev_fault' mangled-name='zpool_vdev_fault' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_fault'> <parameter type-id='4c81de99' name='zhp'/> <parameter type-id='9c313c2d' name='guid'/> @@ -6098,7 +6108,7 @@ <array-type-def dimensions='1' type-id='b96825af' size-in-bits='64' id='13339fda'> <subrange length='8' type-id='7359adad' id='56e0c0b1'/> </array-type-def> - <class-decl name='sendflags' size-in-bits='544' is-struct='yes' visibility='default' id='f6aa15be'> + <class-decl name='sendflags' size-in-bits='576' is-struct='yes' visibility='default' id='f6aa15be'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='verbosity' type-id='95e97e5e' visibility='default'/> </data-member> @@ -6130,24 +6140,27 @@ <var-decl name='progress' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='320'> - <var-decl name='largeblock' type-id='c19b74c3' visibility='default'/> + <var-decl name='progressastitle' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='352'> - <var-decl name='embed_data' type-id='c19b74c3' visibility='default'/> + <var-decl name='largeblock' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='384'> - <var-decl name='compress' type-id='c19b74c3' visibility='default'/> + <var-decl name='embed_data' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='416'> - <var-decl name='raw' type-id='c19b74c3' visibility='default'/> + <var-decl name='compress' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='448'> - <var-decl name='backup' type-id='c19b74c3' visibility='default'/> + <var-decl name='raw' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='480'> - <var-decl name='holds' type-id='c19b74c3' visibility='default'/> + <var-decl name='backup' type-id='c19b74c3' visibility='default'/> </data-member> <data-member access='public' layout-offset-in-bits='512'> + <var-decl name='holds' type-id='c19b74c3' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='544'> <var-decl name='saved' type-id='c19b74c3' visibility='default'/> </data-member> </class-decl> @@ -6728,6 +6741,11 @@ <parameter type-id='95e97e5e'/> <return type-id='48b5725f'/> </function-decl> + <function-decl name='zfs_setproctitle' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='80f4b756'/> + <parameter is-variadic='yes'/> + <return type-id='48b5725f'/> + </function-decl> <function-decl name='avl_insert' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='a3681dea'/> <parameter type-id='eaa32e2f'/> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c index b9698bb22b1f..cf625949fe32 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c @@ -56,10 +56,10 @@ #define ZDIFF_REMOVED '-' #define ZDIFF_RENAMED "R" -#define ZDIFF_ADDED_COLOR ANSI_GREEN +#define ZDIFF_ADDED_COLOR ANSI_GREEN #define ZDIFF_MODIFIED_COLOR ANSI_YELLOW -#define ZDIFF_REMOVED_COLOR ANSI_RED -#define ZDIFF_RENAMED_COLOR ANSI_BLUE +#define ZDIFF_REMOVED_COLOR ANSI_RED +#define ZDIFF_RENAMED_COLOR ANSI_BOLD_BLUE /* * Given a {dsname, object id}, get the object path diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index e43ebb15c608..29f077841da0 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -3051,7 +3051,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - if (avail_spare) + if (!(flags & ZFS_ONLINE_SPARE) && avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); if ((flags & ZFS_ONLINE_EXPAND || @@ -3159,6 +3159,40 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) } /* + * Remove the specified vdev asynchronously from the configuration, so + * that it may come ONLINE if reinserted. This is called from zed on + * Udev remove event. + * Note: We also have a similar function zpool_vdev_remove() that + * removes the vdev from the pool. + */ +int +zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path) +{ + zfs_cmd_t zc = {"\0"}; + char errbuf[1024]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot remove %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + NULL)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, errbuf)); + + zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); + + zc.zc_cookie = VDEV_STATE_REMOVED; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, errbuf)); +} + +/* * Mark the given vdev faulted. */ int diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index 6a53571e3a87..8f496b20b89f 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -83,6 +83,9 @@ typedef struct progress_arg { boolean_t pa_parsable; boolean_t pa_estimate; int pa_verbosity; + boolean_t pa_astitle; + boolean_t pa_progress; + uint64_t pa_size; } progress_arg_t; static int @@ -712,6 +715,7 @@ typedef struct send_dump_data { boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t dryrun, parsable, progress, embed_data, std_out; boolean_t large_block, compress, raw, holds; + boolean_t progressastitle; int outfd; boolean_t err; nvlist_t *fss; @@ -904,6 +908,7 @@ send_progress_thread(void *arg) zfs_handle_t *zhp = pa->pa_zhp; uint64_t bytes; uint64_t blocks; + uint64_t total = pa->pa_size / 100; char buf[16]; time_t t; struct tm *tm; @@ -922,7 +927,7 @@ send_progress_thread(void *arg) return ((void *)(uintptr_t)err); } - if (firstloop && !pa->pa_parsable) { + if (firstloop && !pa->pa_parsable && pa->pa_progress) { (void) fprintf(stderr, "TIME %s %sSNAPSHOT %s\n", pa->pa_estimate ? "BYTES" : " SENT", @@ -934,6 +939,17 @@ send_progress_thread(void *arg) (void) time(&t); tm = localtime(&t); + if (pa->pa_astitle) { + char buf_bytes[16]; + char buf_size[16]; + int pct; + zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes)); + zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size)); + pct = (total > 0) ? bytes / total : 100; + zfs_setproctitle("sending %s (%d%%: %s/%s)", + zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size); + } + if (pa->pa_verbosity >= 2 && pa->pa_parsable) { (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%llu\t%s\n", @@ -950,7 +966,7 @@ send_progress_thread(void *arg) (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", tm->tm_hour, tm->tm_min, tm->tm_sec, (u_longlong_t)bytes, zhp->zfs_name); - } else { + } else if (pa->pa_progress) { zfs_nicebytes(bytes, buf, sizeof (buf)); (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", tm->tm_hour, tm->tm_min, tm->tm_sec, @@ -1114,12 +1130,15 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) * If progress reporting is requested, spawn a new thread to * poll ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (sdd->progress) { + if (sdd->progress || sdd->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = sdd->outfd; pa.pa_parsable = sdd->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = sdd->verbosity; + pa.pa_size = sdd->size; + pa.pa_astitle = sdd->progressastitle; + pa.pa_progress = sdd->progress; if ((err = pthread_create(&tid, NULL, send_progress_thread, &pa)) != 0) { @@ -1131,7 +1150,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, fromorigin, sdd->outfd, flags, sdd->debugnv); - if (sdd->progress) { + if (sdd->progress || sdd->progressastitle) { void *status = NULL; (void) pthread_cancel(tid); (void) pthread_join(tid, &status); @@ -1462,7 +1481,7 @@ lzc_flags_from_sendflags(const sendflags_t *flags) static int estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes, - const char *redactbook, char *errbuf) + const char *redactbook, char *errbuf, uint64_t *sizep) { uint64_t size; FILE *fout = flags->dryrun ? stdout : stderr; @@ -1470,7 +1489,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, int err = 0; pthread_t ptid; - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = fd; pa.pa_parsable = flags->parsable; @@ -1489,8 +1508,9 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, err = lzc_send_space_resume_redacted(zhp->zfs_name, from, lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes, redactbook, fd, &size); + *sizep = size; - if (flags->progress) { + if (flags->progress || flags->progressastitle) { void *status = NULL; (void) pthread_cancel(ptid); (void) pthread_join(ptid, &status); @@ -1505,6 +1525,9 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, } } + if (!flags->progress && !flags->parsable) + return (err); + if (err != 0) { zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err)); return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, @@ -1638,6 +1661,7 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, uint64_t *redact_snap_guids = NULL; int num_redact_snaps = 0; char *redact_book = NULL; + uint64_t size = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot resume send")); @@ -1731,7 +1755,7 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, } } - if (flags->verbosity != 0) { + if (flags->verbosity != 0 || flags->progressastitle) { /* * Some of these may have come from the resume token, set them * here for size estimate purposes. @@ -1748,7 +1772,7 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, if (lzc_flags & LZC_SEND_FLAG_SAVED) tmpflags.saved = B_TRUE; error = estimate_size(zhp, fromname, outfd, &tmpflags, - resumeobj, resumeoff, bytes, redact_book, errbuf); + resumeobj, resumeoff, bytes, redact_book, errbuf, &size); } if (!flags->dryrun) { @@ -1758,12 +1782,15 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, * If progress reporting is requested, spawn a new thread to * poll ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = outfd; pa.pa_parsable = flags->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = flags->verbosity; + pa.pa_size = size; + pa.pa_astitle = flags->progressastitle; + pa.pa_progress = flags->progress; error = pthread_create(&tid, NULL, send_progress_thread, &pa); @@ -1780,7 +1807,7 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, if (redact_book != NULL) free(redact_book); - if (flags->progress) { + if (flags->progress || flags->progress) { void *status = NULL; (void) pthread_cancel(tid); (void) pthread_join(tid, &status); @@ -1790,6 +1817,7 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "progress thread exited nonzero")); + zfs_close(zhp); return (zfs_standard_error(hdl, error, errbuf)); } } @@ -2199,6 +2227,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.verbosity = flags->verbosity; sdd.parsable = flags->parsable; sdd.progress = flags->progress; + sdd.progressastitle = flags->progressastitle; sdd.dryrun = flags->dryrun; sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; @@ -2410,6 +2439,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, char *name = zhp->zfs_name; pthread_t ptid; progress_arg_t pa = { 0 }; + uint64_t size = 0; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -2492,9 +2522,9 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, /* * Perform size estimate if verbose was specified. */ - if (flags->verbosity != 0) { + if (flags->verbosity != 0 || flags->progressastitle) { err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook, - errbuf); + errbuf, &size); if (err != 0) return (err); } @@ -2506,12 +2536,15 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, * If progress reporting is requested, spawn a new thread to poll * ZFS_IOC_SEND_PROGRESS at a regular interval. */ - if (flags->progress) { + if (flags->progress || flags->progressastitle) { pa.pa_zhp = zhp; pa.pa_fd = fd; pa.pa_parsable = flags->parsable; pa.pa_estimate = B_FALSE; pa.pa_verbosity = flags->verbosity; + pa.pa_size = size; + pa.pa_astitle = flags->progressastitle; + pa.pa_progress = flags->progress; err = pthread_create(&ptid, NULL, send_progress_thread, &pa); @@ -2525,7 +2558,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, err = lzc_send_redacted(name, from, fd, lzc_flags_from_sendflags(flags), redactbook); - if (flags->progress) { + if (flags->progress || flags->progressastitle) { void *status = NULL; (void) pthread_cancel(ptid); (void) pthread_join(ptid, &status); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c index 8eb7582ba09a..7c4d310782eb 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c @@ -2027,7 +2027,7 @@ zfs_version_print(void) * Return 1 if the user requested ANSI color output, and our terminal supports * it. Return 0 for no color. */ -static int +int use_color(void) { static int use_color = -1; @@ -2073,10 +2073,11 @@ use_color(void) } /* - * color_start() and color_end() are used for when you want to colorize a block - * of text. For example: + * The functions color_start() and color_end() are used for when you want + * to colorize a block of text. * - * color_start(ANSI_RED_FG) + * For example: + * color_start(ANSI_RED) * printf("hello"); * printf("world"); * color_end(); @@ -2084,7 +2085,7 @@ use_color(void) void color_start(const char *color) { - if (use_color()) { + if (color && use_color()) { fputs(color, stdout); fflush(stdout); } @@ -2099,7 +2100,9 @@ color_end(void) } } -/* printf() with a color. If color is NULL, then do a normal printf. */ +/* + * printf() with a color. If color is NULL, then do a normal printf. + */ int printf_color(const char *color, char *format, ...) { diff --git a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi index c15cb3afbfca..1b03a5c42ef4 100644 --- a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi +++ b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi @@ -272,6 +272,8 @@ <elf-symbol name='zfs_niceraw' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_nicetime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_resolve_shortname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zfs_setproctitle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zfs_setproctitle_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_strcmp_pathname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_strip_partition' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_strip_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -3340,6 +3342,30 @@ <return type-id='95e97e5e'/> </function-decl> </abi-instr> + <abi-instr address-size='64' path='os/linux/zutil_setproctitle.c' language='LANG_C99'> + <function-decl name='warnx' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='80f4b756'/> + <parameter is-variadic='yes'/> + <return type-id='48b5725f'/> + </function-decl> + <function-decl name='setenv' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='80f4b756'/> + <parameter type-id='80f4b756'/> + <parameter type-id='95e97e5e'/> + <return type-id='95e97e5e'/> + </function-decl> + <function-decl name='zfs_setproctitle_init' mangled-name='zfs_setproctitle_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle_init'> + <parameter type-id='95e97e5e' name='argc'/> + <parameter type-id='9b23c9ad' name='argv'/> + <parameter type-id='9b23c9ad' name='envp'/> + <return type-id='48b5725f'/> + </function-decl> + <function-decl name='zfs_setproctitle' mangled-name='zfs_setproctitle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle'> + <parameter type-id='80f4b756' name='fmt'/> + <parameter is-variadic='yes'/> + <return type-id='48b5725f'/> + </function-decl> + </abi-instr> <abi-instr address-size='64' path='zutil_device_path.c' language='LANG_C99'> <qualified-type-def type-id='26a90f95' restrict='yes' id='266fe297'/> <qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/> diff --git a/sys/contrib/openzfs/lib/libzutil/Makefile.am b/sys/contrib/openzfs/lib/libzutil/Makefile.am index f55b7798f1c0..0ddc241d6df9 100644 --- a/sys/contrib/openzfs/lib/libzutil/Makefile.am +++ b/sys/contrib/openzfs/lib/libzutil/Makefile.am @@ -18,6 +18,7 @@ USER_C = \ if BUILD_LINUX USER_C += \ + os/linux/zutil_setproctitle.c \ os/linux/zutil_device_path_os.c \ os/linux/zutil_import_os.c \ os/linux/zutil_compat.c diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c new file mode 100644 index 000000000000..4a6d12cf70cf --- /dev/null +++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_setproctitle.c @@ -0,0 +1,299 @@ +/* + * Copyright © 2013 Guillem Jover <guillem@hadrons.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> +#include <stddef.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <err.h> +#include <unistd.h> +#include <string.h> +#include <sys/param.h> +#include <libzutil.h> + +static struct { + /* Original value. */ + const char *arg0; + + /* Title space available. */ + char *base, *end; + + /* Pointer to original nul character within base. */ + char *nul; + + boolean_t warned; + boolean_t reset; + int error; +} SPT; + +#define LIBBSD_IS_PATHNAME_SEPARATOR(c) ((c) == '/') +#define SPT_MAXTITLE 255 + +extern const char *__progname; + +static const char * +getprogname(void) +{ + return (__progname); +} + +static void +setprogname(const char *progname) +{ + size_t i; + + for (i = strlen(progname); i > 0; i--) { + if (LIBBSD_IS_PATHNAME_SEPARATOR(progname[i - 1])) { + __progname = progname + i; + return; + } + } + __progname = progname; +} + + +static inline size_t +spt_min(size_t a, size_t b) +{ + return ((a < b) ? a : b); +} + +/* + * For discussion on the portability of the various methods, see + * https://lists.freebsd.org/pipermail/freebsd-stable/2008-June/043136.html + */ +static int +spt_clearenv(void) +{ + char **tmp; + + tmp = malloc(sizeof (*tmp)); + if (tmp == NULL) + return (errno); + + tmp[0] = NULL; + environ = tmp; + + return (0); +} + +static int +spt_copyenv(int envc, char *envp[]) +{ + char **envcopy; + char *eq; + int envsize; + int i, error; + + if (environ != envp) + return (0); + + /* + * Make a copy of the old environ array of pointers, in case + * clearenv() or setenv() is implemented to free the internal + * environ array, because we will need to access the old environ + * contents to make the new copy. + */ + envsize = (envc + 1) * sizeof (char *); + envcopy = malloc(envsize); + if (envcopy == NULL) + return (errno); + memcpy(envcopy, envp, envsize); + + error = spt_clearenv(); + if (error) { + environ = envp; + free(envcopy); + return (error); + } + + for (i = 0; envcopy[i]; i++) { + eq = strchr(envcopy[i], '='); + if (eq == NULL) + continue; + + *eq = '\0'; + if (setenv(envcopy[i], eq + 1, 1) < 0) + error = errno; + *eq = '='; + + if (error) { + environ = envp; + free(envcopy); + return (error); + } + } + + /* + * Dispose of the shallow copy, now that we've finished transfering + * the old environment. + */ + free(envcopy); + + return (0); +} + +static int +spt_copyargs(int argc, char *argv[]) +{ + char *tmp; + int i; + + for (i = 1; i < argc || (i >= argc && argv[i]); i++) { + if (argv[i] == NULL) + continue; + + tmp = strdup(argv[i]); + if (tmp == NULL) + return (errno); + + argv[i] = tmp; + } + + return (0); +} + +void +zfs_setproctitle_init(int argc, char *argv[], char *envp[]) +{ + char *base, *end, *nul, *tmp; + int i, envc, error; + + /* Try to make sure we got called with main() arguments. */ + if (argc < 0) + return; + + base = argv[0]; + if (base == NULL) + return; + + nul = base + strlen(base); + end = nul + 1; + + for (i = 0; i < argc || (i >= argc && argv[i]); i++) { + if (argv[i] == NULL || argv[i] != end) + continue; + + end = argv[i] + strlen(argv[i]) + 1; + } + + for (i = 0; envp[i]; i++) { + if (envp[i] != end) + continue; + + end = envp[i] + strlen(envp[i]) + 1; + } + envc = i; + + SPT.arg0 = strdup(argv[0]); + if (SPT.arg0 == NULL) { + SPT.error = errno; + return; + } + + tmp = strdup(getprogname()); + if (tmp == NULL) { + SPT.error = errno; + return; + } + setprogname(tmp); + + error = spt_copyenv(envc, envp); + if (error) { + SPT.error = error; + return; + } + + error = spt_copyargs(argc, argv); + if (error) { + SPT.error = error; + return; + } + + SPT.nul = nul; + SPT.base = base; + SPT.end = end; +} + +void +zfs_setproctitle(const char *fmt, ...) +{ + /* Use buffer in case argv[0] is passed. */ + char buf[SPT_MAXTITLE + 1]; + va_list ap; + char *nul; + int len; + if (SPT.base == NULL) { + if (!SPT.warned) { + warnx("setproctitle not initialized, please" + "call zfs_setproctitle_init()"); + SPT.warned = B_TRUE; + } + return; + } + + if (fmt) { + if (fmt[0] == '-') { + /* Skip program name prefix. */ + fmt++; + len = 0; + } else { + /* Print program name heading for grep. */ + snprintf(buf, sizeof (buf), "%s: ", getprogname()); + len = strlen(buf); + } + + va_start(ap, fmt); + len += vsnprintf(buf + len, sizeof (buf) - len, fmt, ap); + va_end(ap); + } else { + len = snprintf(buf, sizeof (buf), "%s", SPT.arg0); + } + + if (len <= 0) { + SPT.error = errno; + return; + } + + if (!SPT.reset) { + memset(SPT.base, 0, SPT.end - SPT.base); + SPT.reset = B_TRUE; + } else { + memset(SPT.base, 0, spt_min(sizeof (buf), SPT.end - SPT.base)); + } + + len = spt_min(len, spt_min(sizeof (buf), SPT.end - SPT.base) - 1); + memcpy(SPT.base, buf, len); + nul = SPT.base + len; + + if (nul < SPT.nul) { + *SPT.nul = '.'; + } else if (nul == SPT.nul && nul + 1 < SPT.end) { + *SPT.nul = ' '; + *++nul = '\0'; + } +} diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index ed8914276376..71a95c3bd812 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -15,7 +15,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd June 1, 2021 +.Dd January 10, 2023 .Dt ZFS 4 .Os . @@ -233,6 +233,12 @@ relative to the pool. Make some blocks above a certain size be gang blocks. This option is used by the test suite to facilitate testing. . +.It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int +Default dnode block size as a power of 2. +. +.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int +Default dnode indirect block size as a power of 2. +. .It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Ns B Po 1MB Pc Pq int When attempting to log an output nvlist of an ioctl in the on-disk history, the output will not be stored if it is larger than this size (in bytes). @@ -2126,6 +2132,13 @@ On very fragmented pools, lowering this .Pq typically to Sy 36kB can improve performance. . +.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64 +This sets the minimum delay in nanoseconds ZIL care to delay block commit, +waiting for more records. +If ZIL writes are too fast, kernel may not be able sleep for so short interval, +increasing log latency above allowed by +.Sy zfs_commit_timeout_pct . +. .It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int Disable the cache flush commands that are normally sent to disk by the ZIL after an LWB write has completed. diff --git a/sys/contrib/openzfs/man/man7/dracut.zfs.7 b/sys/contrib/openzfs/man/man7/dracut.zfs.7 index 0f446fe2fe3f..d9234bdf5649 100644 --- a/sys/contrib/openzfs/man/man7/dracut.zfs.7 +++ b/sys/contrib/openzfs/man/man7/dracut.zfs.7 @@ -1,6 +1,6 @@ .\" SPDX-License-Identifier: 0BSD .\" -.Dd April 4, 2022 +.Dd March 28, 2023 .Dt DRACUT.ZFS 7 .Os . @@ -28,13 +28,13 @@ zfs-import-scan.service \(da \(da | zfs-import-c zfs-import.target \(-> dracut-pre-mount.service | \(ua | | dracut-zfs-generator | - | ____________________/| + | _____________________/| |/ \(da - | sysroot.mount \(<-\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em dracut-zfs-generator - | | \(da | - | \(da sysroot-{usr,etc,lib,&c.}.mount | - | initrd-root-fs.target \(<-\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em or \(da - | | zfs-nonroot-necessities.service + | sysroot.mount \(<-\(em\(em\(em dracut-zfs-generator + | | + | \(da + | initrd-root-fs.target \(<-\(em zfs-nonroot-necessities.service + | | | | \(da | \(da dracut-mount.service | zfs-snapshot-bootfs.service | | @@ -42,7 +42,7 @@ zfs-import-scan.service \(da \(da | zfs-import-c \(da … | zfs-rollback-bootfs.service | | | \(da | - | sysroot-usr.mount \(<-\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em/ + | /sysroot/{usr,etc,lib,&c.} \(<-\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em/ | | | \(da | initrd-fs.target diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index 688bd033979a..3280a1e3613c 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -29,7 +29,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd April 15, 2021 +.Dd January 12, 2023 .Dt ZFS-SEND 8 .Os . @@ -39,28 +39,28 @@ .Sh SYNOPSIS .Nm zfs .Cm send -.Op Fl DLPRbcehnpsvw +.Op Fl DLPVRbcehnpsvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Nm zfs .Cm send -.Op Fl DLPcensvw +.Op Fl DLPVcensvw .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Nm zfs .Cm send .Fl -redact Ar redaction_bookmark -.Op Fl DLPcenpv +.Op Fl DLPVcenpv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar snapshot .Nm zfs .Cm send -.Op Fl Penv +.Op Fl PVenv .Fl t .Ar receive_resume_token .Nm zfs .Cm send -.Op Fl Pnv +.Op Fl PVnv .Fl S Ar filesystem .Nm zfs .Cm redact @@ -72,7 +72,7 @@ .It Xo .Nm zfs .Cm send -.Op Fl DLPRbcehnpvw +.Op Fl DLPVRbcehnpvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Xc @@ -140,6 +140,8 @@ If the flag is used to send encrypted datasets, then .Fl w must also be specified. +.It Fl V , -proctitle +Set the process title to a per-second report of how much data has been sent. .It Fl e , -embed Generate a more compact stream by using .Sy WRITE_EMBEDDED @@ -285,7 +287,7 @@ You will be able to receive your streams on future versions of ZFS. .It Xo .Nm zfs .Cm send -.Op Fl DLPcenvw +.Op Fl DLPVcenvw .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc @@ -417,7 +419,7 @@ This information includes a per-second report of how much data has been sent. .Nm zfs .Cm send .Fl -redact Ar redaction_bookmark -.Op Fl DLPcenpv +.Op Fl DLPVcenpv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar snapshot .Xc @@ -511,7 +513,7 @@ raw sends and redacted sends cannot be combined at this time. .It Xo .Nm zfs .Cm send -.Op Fl Penv +.Op Fl PVenv .Fl t .Ar receive_resume_token .Xc @@ -526,7 +528,7 @@ for more details. .It Xo .Nm zfs .Cm send -.Op Fl Pnv +.Op Fl PVnv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Fl S .Ar filesystem diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8 index 2fc2f3166a75..23220b7f3ee6 100644 --- a/sys/contrib/openzfs/man/man8/zfs.8 +++ b/sys/contrib/openzfs/man/man8/zfs.8 @@ -708,6 +708,8 @@ command will be undone if the share is ever unshared (like via a reboot). .It Sy ZFS_COLOR Use ANSI color in .Nm zfs Cm diff +and +.Nm zfs Cm list output. .El .Bl -tag -width "ZFS_MOUNT_HELPER" diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index 192a8e2eac8d..e5d7c8515177 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -433,6 +433,8 @@ to dump core on exit for the purposes of running .It Sy ZFS_COLOR Use ANSI color in .Nm zpool status +and +.Nm zpool iostat output. .It Sy ZPOOL_IMPORT_PATH The search path for devices or files to use with the pool. diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c index 590d1c04b9a5..3dd49f05521b 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c @@ -160,8 +160,6 @@ arc_prune_task(void *arg) { int64_t nr_scan = (intptr_t)arg; - arc_reduce_target_size(ptob(nr_scan)); - #ifndef __ILP32__ if (nr_scan > INT_MAX) nr_scan = INT_MAX; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 5315b60982df..b5db3f83eb07 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -356,14 +356,6 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN, /* dsl_pool.c */ /* dnode.c */ -extern int zfs_default_bs; -SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN, - &zfs_default_bs, 0, "Default dnode block shift"); - -extern int zfs_default_ibs; -SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN, - &zfs_default_ibs, 0, "Default dnode indirect block shift"); - /* dsl_scan.c */ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 09c35b371920..ec80bd7994b7 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -1161,7 +1161,10 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; + lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, + RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); + zfs_rangelock_exit(lr); *off = noff; break; } diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c index 5ea4fc635165..508fb9d4c7f7 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c @@ -48,6 +48,7 @@ #include <linux/mod_compat.h> #include <sys/cred.h> #include <sys/vnode.h> +#include <sys/misc.h> char spl_gitrev[64] = ZFS_META_GITREV; @@ -540,6 +541,38 @@ ddi_copyin(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyin); +/* + * Post a uevent to userspace whenever a new vdev adds to the pool. It is + * necessary to sync blkid information with udev, which zed daemon uses + * during device hotplug to identify the vdev. + */ +void +spl_signal_kobj_evt(struct block_device *bdev) +{ +#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV) +#ifdef HAVE_BDEV_KOBJ + struct kobject *disk_kobj = bdev_kobj(bdev); +#else + struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj; +#endif + if (disk_kobj) { + int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE); + if (ret) { + pr_warn("ZFS: Sending event '%d' to kobject: '%s'" + " (%p): failed(ret:%d)\n", KOBJ_CHANGE, + kobject_name(disk_kobj), disk_kobj, ret); + } + } +#else +/* + * This is encountered if neither bdev_kobj() nor part_to_dev() is available + * in the kernel - likely due to an API change that needs to be chased down. + */ +#error "Unsupported kernel: unable to get struct kobj from bdev" +#endif +} +EXPORT_SYMBOL(spl_signal_kobj_evt); + int ddi_copyout(const void *from, void *to, size_t len, int flags) { diff --git a/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c b/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c index 1d099c95bc7c..64e19e03747f 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c @@ -247,7 +247,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, Cpa8U *buffer_meta_src = NULL; Cpa8U *buffer_meta_dst = NULL; Cpa32U buffer_meta_size = 0; - CpaDcRqResults dc_results; + CpaDcRqResults dc_results = {.checksum = 1}; CpaStatus status = CPA_STATUS_FAIL; Cpa32U hdr_sz = 0; Cpa32U compressed_sz; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 2f84792d89be..60b111c59f23 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -179,6 +179,18 @@ vdev_disk_error(zio_t *zio) zio->io_flags); } +static void +vdev_disk_kobj_evt_post(vdev_t *v) +{ + vdev_disk_t *vd = v->vdev_tsd; + if (vd && vd->vd_bdev) { + spl_signal_kobj_evt(vd->vd_bdev); + } else { + vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n", + v->vdev_path); + } +} + static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) @@ -290,6 +302,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { + /* + * There is no point of waiting since device is removed + * explicitly + */ + if (v->vdev_removed) + break; + schedule_timeout(MSEC_TO_TICK(10)); } else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) { timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); @@ -899,7 +918,7 @@ vdev_disk_io_done(zio_t *zio) vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - if (zfs_check_media_change(vd->vd_bdev)) { + if (!zfs_check_disk_status(vd->vd_bdev)) { invalidate_bdev(vd->vd_bdev); v->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); @@ -955,7 +974,8 @@ vdev_ops_t vdev_disk_ops = { .vdev_op_nparity = NULL, .vdev_op_ndisks = NULL, .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ - .vdev_op_leaf = B_TRUE /* leaf vdev */ + .vdev_op_leaf = B_TRUE, /* leaf vdev */ + .vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post }; /* diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index ba2375387104..f3475b4d9794 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -162,8 +162,7 @@ zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags) znode_hold_t *zh = buf; mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL); - zfs_refcount_create(&zh->zh_refcount); - zh->zh_obj = ZFS_NO_OBJECT; + zh->zh_refcount = 0; return (0); } @@ -174,7 +173,6 @@ zfs_znode_hold_cache_destructor(void *buf, void *arg) znode_hold_t *zh = buf; mutex_destroy(&zh->zh_lock); - zfs_refcount_destroy(&zh->zh_refcount); } void @@ -273,26 +271,26 @@ zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj) boolean_t found = B_FALSE; zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP); - zh_new->zh_obj = obj; search.zh_obj = obj; mutex_enter(&zfsvfs->z_hold_locks[i]); zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL); if (likely(zh == NULL)) { zh = zh_new; + zh->zh_obj = obj; avl_add(&zfsvfs->z_hold_trees[i], zh); } else { ASSERT3U(zh->zh_obj, ==, obj); found = B_TRUE; } - zfs_refcount_add(&zh->zh_refcount, NULL); + zh->zh_refcount++; + ASSERT3S(zh->zh_refcount, >, 0); mutex_exit(&zfsvfs->z_hold_locks[i]); if (found == B_TRUE) kmem_cache_free(znode_hold_cache, zh_new); ASSERT(MUTEX_NOT_HELD(&zh->zh_lock)); - ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0); mutex_enter(&zh->zh_lock); return (zh); @@ -305,11 +303,11 @@ zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh) boolean_t remove = B_FALSE; ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj)); - ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0); mutex_exit(&zh->zh_lock); mutex_enter(&zfsvfs->z_hold_locks[i]); - if (zfs_refcount_remove(&zh->zh_refcount, NULL) == 0) { + ASSERT3S(zh->zh_refcount, >, 0); + if (--zh->zh_refcount == 0) { avl_remove(&zfsvfs->z_hold_trees[i], zh); remove = B_TRUE; } diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index fe66fd83d1f5..6900b6b134d9 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -5943,6 +5943,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp); boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF; + arc_buf_t *buf = NULL; int rc = 0; ASSERT(!embedded_bp || @@ -5972,7 +5973,7 @@ top: if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER, BLK_VERIFY_LOG)) { rc = SET_ERROR(ECKSUM); - goto out; + goto done; } if (!embedded_bp) { @@ -5992,7 +5993,6 @@ top: */ if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { - arc_buf_t *buf = NULL; *arc_flags |= ARC_FLAG_CACHED; if (HDR_IO_IN_PROGRESS(hdr)) { @@ -6002,7 +6002,7 @@ top: mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_cached_only_in_progress); rc = SET_ERROR(ENOENT); - goto out; + goto done; } ASSERT3P(head_zio, !=, NULL); @@ -6130,9 +6130,7 @@ top: ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); - - if (done) - done(NULL, zb, bp, buf, private); + goto done; } else { uint64_t lsize = BP_GET_LSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp); @@ -6145,10 +6143,10 @@ top: int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0; if (*arc_flags & ARC_FLAG_CACHED_ONLY) { - rc = SET_ERROR(ENOENT); if (hash_lock != NULL) mutex_exit(hash_lock); - goto out; + rc = SET_ERROR(ENOENT); + goto done; } if (hdr == NULL) { @@ -6474,6 +6472,16 @@ out: spa_read_history_add(spa, zb, *arc_flags); spl_fstrans_unmark(cookie); return (rc); + +done: + if (done) + done(NULL, zb, bp, buf, private); + if (pio && rc != 0) { + zio_t *zio = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); + zio->io_error = rc; + zio_nowait(zio); + } + goto out; } arc_prune_t * diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c index 68f534c6b197..a8e9309d284b 100644 --- a/sys/contrib/openzfs/module/zfs/bpobj.c +++ b/sys/contrib/openzfs/module/zfs/bpobj.c @@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) } VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); - VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); - if (bpobj_is_empty(&subbpo)) { /* No point in having an empty subobj. */ bpobj_close(&subbpo); bpobj_free(bpo->bpo_os, subobj, tx); return; } + VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); mutex_enter(&bpo->bpo_lock); dmu_buf_will_dirty(bpo->bpo_dbuf, tx); @@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) } +/* + * Prefetch metadata required for bpobj_enqueue_subobj(). + */ +void +bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj) +{ + dmu_object_info_t doi; + bpobj_t subbpo; + uint64_t subsubobjs; + boolean_t copy_subsub = B_TRUE; + boolean_t copy_bps = B_TRUE; + + ASSERT(bpobj_is_open(bpo)); + ASSERT(subobj != 0); + + if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) + return; + + if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0) + return; + if (bpobj_is_empty(&subbpo)) { + bpobj_close(&subbpo); + return; + } + subsubobjs = subbpo.bpo_phys->bpo_subobjs; + bpobj_close(&subbpo); + + if (subsubobjs != 0) { + if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0) + return; + if (doi.doi_max_offset > doi.doi_data_block_size) + copy_subsub = B_FALSE; + } + + if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0) + return; + if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub) + copy_bps = B_FALSE; + + if (copy_subsub && subsubobjs != 0) { + if (bpo->bpo_phys->bpo_subobjs) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, + ZIO_PRIORITY_ASYNC_READ); + } + dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1, + ZIO_PRIORITY_ASYNC_READ); + } + + if (copy_bps) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, + bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1, + ZIO_PRIORITY_ASYNC_READ); + dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1, + ZIO_PRIORITY_ASYNC_READ); + } else if (bpo->bpo_phys->bpo_subobjs) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, + ZIO_PRIORITY_ASYNC_READ); + } +} + void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c index e16c4ebef6ba..28ab3fcdcc3c 100644 --- a/sys/contrib/openzfs/module/zfs/btree.c +++ b/sys/contrib/openzfs/module/zfs/btree.c @@ -102,7 +102,7 @@ zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr) (void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size); (void) memset(leaf->btl_elems + (hdr->bth_first + hdr->bth_count) * size, 0x0f, - BTREE_LEAF_ESIZE - + tree->bt_leaf_size - offsetof(zfs_btree_leaf_t, btl_elems) - (hdr->bth_first + hdr->bth_count) * size); } #endif @@ -173,16 +173,44 @@ zfs_btree_fini(void) kmem_cache_destroy(zfs_btree_leaf_cache); } +static void * +zfs_btree_leaf_alloc(zfs_btree_t *tree) +{ + if (tree->bt_leaf_size == BTREE_LEAF_SIZE) + return (kmem_cache_alloc(zfs_btree_leaf_cache, KM_SLEEP)); + else + return (kmem_alloc(tree->bt_leaf_size, KM_SLEEP)); +} + +static void +zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr) +{ + if (tree->bt_leaf_size == BTREE_LEAF_SIZE) + return (kmem_cache_free(zfs_btree_leaf_cache, ptr)); + else + return (kmem_free(ptr, tree->bt_leaf_size)); +} + void zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *), size_t size) { - ASSERT3U(size, <=, BTREE_LEAF_ESIZE / 2); + zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE); +} + +void +zfs_btree_create_custom(zfs_btree_t *tree, + int (*compar) (const void *, const void *), + size_t size, size_t lsize) +{ + size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems); - bzero(tree, sizeof (*tree)); + ASSERT3U(size, <=, esize / 2); + memset(tree, 0, sizeof (*tree)); tree->bt_compar = compar; tree->bt_elem_size = size; - tree->bt_leaf_cap = P2ALIGN(BTREE_LEAF_ESIZE / size, 2); + tree->bt_leaf_size = lsize; + tree->bt_leaf_cap = P2ALIGN(esize / size, 2); tree->bt_height = -1; tree->bt_bulk = NULL; } @@ -290,7 +318,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where) zfs_btree_core_t *node = NULL; uint32_t child = 0; - uint64_t depth = 0; + uint32_t depth = 0; /* * Iterate down the tree, finding which child the value should be in @@ -811,8 +839,7 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, move_count++; } tree->bt_num_nodes++; - zfs_btree_leaf_t *new_leaf = kmem_cache_alloc(zfs_btree_leaf_cache, - KM_SLEEP); + zfs_btree_leaf_t *new_leaf = zfs_btree_leaf_alloc(tree); zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr; new_hdr->bth_parent = leaf->btl_hdr.bth_parent; new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) + @@ -1078,8 +1105,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value, ASSERT0(where->bti_offset); tree->bt_num_nodes++; - zfs_btree_leaf_t *leaf = kmem_cache_alloc(zfs_btree_leaf_cache, - KM_SLEEP); + zfs_btree_leaf_t *leaf = zfs_btree_leaf_alloc(tree); tree->bt_root = &leaf->btl_hdr; tree->bt_height++; @@ -1378,7 +1404,7 @@ zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node) { tree->bt_num_nodes--; if (!zfs_btree_is_core(node)) { - kmem_cache_free(zfs_btree_leaf_cache, node); + zfs_btree_leaf_free(tree, node); } else { kmem_free(node, sizeof (zfs_btree_core_t) + BTREE_CORE_ELEMS * tree->bt_elem_size); @@ -1991,7 +2017,7 @@ zfs_btree_verify_counts(zfs_btree_t *tree) */ static uint64_t zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr, - int64_t height) + int32_t height) { if (!zfs_btree_is_core(hdr)) { VERIFY0(height); @@ -2117,8 +2143,10 @@ zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr) zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr; for (size_t i = 0; i < hdr->bth_first * size; i++) VERIFY3U(leaf->btl_elems[i], ==, 0x0f); + size_t esize = tree->bt_leaf_size - + offsetof(zfs_btree_leaf_t, btl_elems); for (size_t i = (hdr->bth_first + hdr->bth_count) * size; - i < BTREE_LEAF_ESIZE; i++) + i < esize; i++) VERIFY3U(leaf->btl_elems[i], ==, 0x0f); } else { zfs_btree_core_t *node = (zfs_btree_core_t *)hdr; diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 53f5775c9e8e..a59aa78f3cc6 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -602,58 +602,58 @@ dbuf_is_metadata(dmu_buf_impl_t *db) boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db) { - vdev_t *vd = NULL; - zfs_cache_type_t cache = db->db_objset->os_secondary_cache; - blkptr_t *bp = db->db_blkptr; - - if (bp != NULL && !BP_IS_HOLE(bp)) { + if (db->db_objset->os_secondary_cache == ZFS_CACHE_ALL || + (db->db_objset->os_secondary_cache == + ZFS_CACHE_METADATA && dbuf_is_metadata(db))) { + if (l2arc_exclude_special == 0) + return (B_TRUE); + + blkptr_t *bp = db->db_blkptr; + if (bp == NULL || BP_IS_HOLE(bp)) + return (B_FALSE); uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev; + vdev_t *vd = NULL; if (vdev < rvd->vdev_children) vd = rvd->vdev_child[vdev]; - if (cache == ZFS_CACHE_ALL || - (dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) { - if (vd == NULL) - return (B_TRUE); + if (vd == NULL) + return (B_TRUE); - if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && - vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || - l2arc_exclude_special == 0) - return (B_TRUE); - } + if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && + vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) + return (B_TRUE); } - return (B_FALSE); } static inline boolean_t dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level) { - vdev_t *vd = NULL; - zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache; - - if (bp != NULL && !BP_IS_HOLE(bp)) { + if (dn->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || + (dn->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA && + (level > 0 || + DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)))) { + if (l2arc_exclude_special == 0) + return (B_TRUE); + + if (bp == NULL || BP_IS_HOLE(bp)) + return (B_FALSE); uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev; + vdev_t *vd = NULL; if (vdev < rvd->vdev_children) vd = rvd->vdev_child[vdev]; - if (cache == ZFS_CACHE_ALL || ((level > 0 || - DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) && - cache == ZFS_CACHE_METADATA)) { - if (vd == NULL) - return (B_TRUE); + if (vd == NULL) + return (B_TRUE); - if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && - vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || - l2arc_exclude_special == 0) - return (B_TRUE); - } + if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && + vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) + return (B_TRUE); } - return (B_FALSE); } @@ -1483,8 +1483,8 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) ASSERT(MUTEX_HELD(&db->db_mtx)); - if (!os->os_encrypted || os->os_raw_receive || - (flags & DB_RF_NO_DECRYPT) != 0) + if ((flags & DB_RF_NO_DECRYPT) != 0 || + !os->os_encrypted || os->os_raw_receive) return (0); DB_DNODE_ENTER(db); @@ -1719,8 +1719,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) mutex_enter(&db->db_mtx); if (db->db_state == DB_CACHED) { - spa_t *spa = dn->dn_objset->os_spa; - /* * Ensure that this block's dnode has been decrypted if * the caller has requested decrypted data. @@ -1739,6 +1737,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) (arc_is_encrypted(db->db_buf) || arc_is_unauthenticated(db->db_buf) || arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { + spa_t *spa = dn->dn_objset->os_spa; zbookmark_phys_t zb; SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), @@ -1755,13 +1754,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); DBUF_STAT_BUMP(hash_hits); } else if (db->db_state == DB_UNCACHED) { - spa_t *spa = dn->dn_objset->os_spa; boolean_t need_wait = B_FALSE; db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); if (zio == NULL && db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { + spa_t *spa = dn->dn_objset->os_spa; zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; } diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index c67a36470e33..96e98a42ece8 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -2100,18 +2100,18 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) } /* - * This function is only called from zfs_holey_common() for zpl_llseek() - * in order to determine the location of holes. In order to accurately - * report holes all dirty data must be synced to disk. This causes extremely - * poor performance when seeking for holes in a dirty file. As a compromise, - * only provide hole data when the dnode is clean. When a dnode is dirty - * report the dnode as having no holes which is always a safe thing to do. + * Reports the location of data and holes in an object. In order to + * accurately report holes all dirty data must be synced to disk. This + * causes extremely poor performance when seeking for holes in a dirty file. + * As a compromise, only provide hole data when the dnode is clean. When + * a dnode is dirty report the dnode as having no holes by returning EBUSY + * which is always safe to do. */ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int err; + int restarted = 0, err; restart: err = dnode_hold(os, object, FTAG, &dn); @@ -2123,19 +2123,23 @@ restart: if (dnode_is_dirty(dn)) { /* * If the zfs_dmu_offset_next_sync module option is enabled - * then strict hole reporting has been requested. Dirty - * dnodes must be synced to disk to accurately report all - * holes. When disabled dirty dnodes are reported to not - * have any holes which is always safe. + * then hole reporting has been requested. Dirty dnodes + * must be synced to disk to accurately report holes. * - * When called by zfs_holey_common() the zp->z_rangelock - * is held to prevent zfs_write() and mmap writeback from - * re-dirtying the dnode after txg_wait_synced(). + * Provided a RL_READER rangelock spanning 0-UINT64_MAX is + * held by the caller only a single restart will be required. + * We tolerate callers which do not hold the rangelock by + * returning EBUSY and not reporting holes after one restart. */ if (zfs_dmu_offset_next_sync) { rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); + + if (restarted) + return (SET_ERROR(EBUSY)); + txg_wait_synced(dmu_objset_pool(os), 0); + restarted = 1; goto restart; } diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index ed0e8de38e5c..adff615a6f03 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -419,28 +419,28 @@ dnode_multilist_index_func(multilist_t *ml, void *obj) static inline boolean_t dmu_os_is_l2cacheable(objset_t *os) { - vdev_t *vd = NULL; - zfs_cache_type_t cache = os->os_secondary_cache; - blkptr_t *bp = os->os_rootbp; - - if (bp != NULL && !BP_IS_HOLE(bp)) { + if (os->os_secondary_cache == ZFS_CACHE_ALL || + os->os_secondary_cache == ZFS_CACHE_METADATA) { + if (l2arc_exclude_special == 0) + return (B_TRUE); + + blkptr_t *bp = os->os_rootbp; + if (bp == NULL || BP_IS_HOLE(bp)) + return (B_FALSE); uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); vdev_t *rvd = os->os_spa->spa_root_vdev; + vdev_t *vd = NULL; if (vdev < rvd->vdev_children) vd = rvd->vdev_child[vdev]; - if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) { - if (vd == NULL) - return (B_TRUE); + if (vd == NULL) + return (B_TRUE); - if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && - vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || - l2arc_exclude_special == 0) - return (B_TRUE); - } + if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && + vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) + return (B_TRUE); } - return (B_FALSE); } @@ -1301,6 +1301,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) ASSERT3P(ds->ds_key_mapping, !=, NULL); key_mapping_rele(spa, ds->ds_key_mapping, ds); dsl_dataset_sync_done(ds, tx); + dmu_buf_rele(ds->ds_dbuf, ds); } mutex_enter(&ds->ds_lock); @@ -2408,6 +2409,13 @@ dmu_objset_id_quota_upgrade_cb(objset_t *os) dmu_objset_userobjspace_present(os)) return (SET_ERROR(ENOTSUP)); + if (dmu_objset_userobjused_enabled(os)) + dmu_objset_ds(os)->ds_feature_activation[ + SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE; + if (dmu_objset_projectquota_enabled(os)) + dmu_objset_ds(os)->ds_feature_activation[ + SPA_FEATURE_PROJECT_QUOTA] = (void *)B_TRUE; + err = dmu_objset_space_upgrade(os); if (err) return (err); diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index 7d895aab76ff..cd9ecc07fd5c 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -493,6 +493,7 @@ dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object, (bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && io_compressed : lsize != psize); if (raw || compressed) { + ASSERT(bp != NULL); ASSERT(raw || dscp->dsc_featureflags & DMU_BACKUP_FEATURE_COMPRESSED); ASSERT(!BP_IS_EMBEDDED(bp)); diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index bca881d82f87..d2985d572975 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -28,6 +28,7 @@ */ #include <sys/zfs_context.h> +#include <sys/arc_impl.h> #include <sys/dnode.h> #include <sys/dmu_objset.h> #include <sys/dmu_zfetch.h> @@ -65,13 +66,15 @@ typedef struct zfetch_stats { kstat_named_t zfetchstat_misses; kstat_named_t zfetchstat_max_streams; kstat_named_t zfetchstat_io_issued; + kstat_named_t zfetchstat_io_active; } zfetch_stats_t; static zfetch_stats_t zfetch_stats = { { "hits", KSTAT_DATA_UINT64 }, { "misses", KSTAT_DATA_UINT64 }, { "max_streams", KSTAT_DATA_UINT64 }, - { "io_issued", KSTAT_DATA_UINT64 }, + { "io_issued", KSTAT_DATA_UINT64 }, + { "io_active", KSTAT_DATA_UINT64 }, }; struct { @@ -79,6 +82,7 @@ struct { wmsum_t zfetchstat_misses; wmsum_t zfetchstat_max_streams; wmsum_t zfetchstat_io_issued; + aggsum_t zfetchstat_io_active; } zfetch_sums; #define ZFETCHSTAT_BUMP(stat) \ @@ -104,6 +108,8 @@ zfetch_kstats_update(kstat_t *ksp, int rw) wmsum_value(&zfetch_sums.zfetchstat_max_streams); zs->zfetchstat_io_issued.value.ui64 = wmsum_value(&zfetch_sums.zfetchstat_io_issued); + zs->zfetchstat_io_active.value.ui64 = + aggsum_value(&zfetch_sums.zfetchstat_io_active); return (0); } @@ -114,6 +120,7 @@ zfetch_init(void) wmsum_init(&zfetch_sums.zfetchstat_misses, 0); wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0); wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0); + aggsum_init(&zfetch_sums.zfetchstat_io_active, 0); zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), @@ -138,6 +145,8 @@ zfetch_fini(void) wmsum_fini(&zfetch_sums.zfetchstat_misses); wmsum_fini(&zfetch_sums.zfetchstat_max_streams); wmsum_fini(&zfetch_sums.zfetchstat_io_issued); + ASSERT0(aggsum_value(&zfetch_sums.zfetchstat_io_active)); + aggsum_fini(&zfetch_sums.zfetchstat_io_active); } /* @@ -294,6 +303,7 @@ dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued) zs->zs_more = B_TRUE; if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0) dmu_zfetch_stream_fini(zs); + aggsum_add(&zfetch_sums.zfetchstat_io_active, -1); } /* @@ -407,20 +417,28 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, * Start prefetch from the demand access size (nblks). Double the * distance every access up to zfetch_min_distance. After that only * if needed increase the distance by 1/8 up to zfetch_max_distance. + * + * Don't double the distance beyond single block if we have more + * than ~6% of ARC held by active prefetches. It should help with + * getting out of RAM on some badly mispredicted read patterns. */ - unsigned int nbytes = nblks << zf->zf_dnode->dn_datablkshift; + unsigned int dbs = zf->zf_dnode->dn_datablkshift; + unsigned int nbytes = nblks << dbs; unsigned int pf_nblks; if (fetch_data) { if (unlikely(zs->zs_pf_dist < nbytes)) zs->zs_pf_dist = nbytes; - else if (zs->zs_pf_dist < zfetch_min_distance) + else if (zs->zs_pf_dist < zfetch_min_distance && + (zs->zs_pf_dist < (1 << dbs) || + aggsum_compare(&zfetch_sums.zfetchstat_io_active, + arc_c_max >> (4 + dbs)) < 0)) zs->zs_pf_dist *= 2; else if (zs->zs_more) zs->zs_pf_dist += zs->zs_pf_dist / 8; zs->zs_more = B_FALSE; if (zs->zs_pf_dist > zfetch_max_distance) zs->zs_pf_dist = zfetch_max_distance; - pf_nblks = zs->zs_pf_dist >> zf->zf_dnode->dn_datablkshift; + pf_nblks = zs->zs_pf_dist >> dbs; } else { pf_nblks = 0; } @@ -439,7 +457,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, zs->zs_ipf_dist *= 2; if (zs->zs_ipf_dist > zfetch_max_idistance) zs->zs_ipf_dist = zfetch_max_idistance; - pf_nblks = zs->zs_ipf_dist >> zf->zf_dnode->dn_datablkshift; + pf_nblks = zs->zs_ipf_dist >> dbs; if (zs->zs_ipf_start < zs->zs_pf_end) zs->zs_ipf_start = zs->zs_pf_end; if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks) @@ -509,6 +527,7 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) dmu_zfetch_stream_fini(zs); return; } + aggsum_add(&zfetch_sums.zfetchstat_io_active, issued); if (!have_lock) rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER); diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 8e55d5447975..ed75c3bdf698 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -71,6 +71,8 @@ dnode_stats_t dnode_stats = { { "dnode_move_active", KSTAT_DATA_UINT64 }, }; +dnode_sums_t dnode_sums; + static kstat_t *dnode_ksp; static kmem_cache_t *dnode_cache; @@ -225,6 +227,72 @@ dnode_dest(void *arg, void *unused) avl_destroy(&dn->dn_dbufs); } +static int +dnode_kstats_update(kstat_t *ksp, int rw) +{ + dnode_stats_t *ds = ksp->ks_data; + + if (rw == KSTAT_WRITE) + return (EACCES); + ds->dnode_hold_dbuf_hold.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_dbuf_hold); + ds->dnode_hold_dbuf_read.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_dbuf_read); + ds->dnode_hold_alloc_hits.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_hits); + ds->dnode_hold_alloc_misses.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_misses); + ds->dnode_hold_alloc_interior.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_interior); + ds->dnode_hold_alloc_lock_retry.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_lock_retry); + ds->dnode_hold_alloc_lock_misses.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_lock_misses); + ds->dnode_hold_alloc_type_none.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_alloc_type_none); + ds->dnode_hold_free_hits.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_hits); + ds->dnode_hold_free_misses.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_misses); + ds->dnode_hold_free_lock_misses.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_lock_misses); + ds->dnode_hold_free_lock_retry.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_lock_retry); + ds->dnode_hold_free_refcount.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_refcount); + ds->dnode_hold_free_overflow.value.ui64 = + wmsum_value(&dnode_sums.dnode_hold_free_overflow); + ds->dnode_free_interior_lock_retry.value.ui64 = + wmsum_value(&dnode_sums.dnode_free_interior_lock_retry); + ds->dnode_allocate.value.ui64 = + wmsum_value(&dnode_sums.dnode_allocate); + ds->dnode_reallocate.value.ui64 = + wmsum_value(&dnode_sums.dnode_reallocate); + ds->dnode_buf_evict.value.ui64 = + wmsum_value(&dnode_sums.dnode_buf_evict); + ds->dnode_alloc_next_chunk.value.ui64 = + wmsum_value(&dnode_sums.dnode_alloc_next_chunk); + ds->dnode_alloc_race.value.ui64 = + wmsum_value(&dnode_sums.dnode_alloc_race); + ds->dnode_alloc_next_block.value.ui64 = + wmsum_value(&dnode_sums.dnode_alloc_next_block); + ds->dnode_move_invalid.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_invalid); + ds->dnode_move_recheck1.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_recheck1); + ds->dnode_move_recheck2.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_recheck2); + ds->dnode_move_special.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_special); + ds->dnode_move_handle.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_handle); + ds->dnode_move_rwlock.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_rwlock); + ds->dnode_move_active.value.ui64 = + wmsum_value(&dnode_sums.dnode_move_active); + return (0); +} + void dnode_init(void) { @@ -233,11 +301,41 @@ dnode_init(void) 0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0); kmem_cache_set_move(dnode_cache, dnode_move); + wmsum_init(&dnode_sums.dnode_hold_dbuf_hold, 0); + wmsum_init(&dnode_sums.dnode_hold_dbuf_read, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_hits, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_misses, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_interior, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_lock_retry, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_lock_misses, 0); + wmsum_init(&dnode_sums.dnode_hold_alloc_type_none, 0); + wmsum_init(&dnode_sums.dnode_hold_free_hits, 0); + wmsum_init(&dnode_sums.dnode_hold_free_misses, 0); + wmsum_init(&dnode_sums.dnode_hold_free_lock_misses, 0); + wmsum_init(&dnode_sums.dnode_hold_free_lock_retry, 0); + wmsum_init(&dnode_sums.dnode_hold_free_refcount, 0); + wmsum_init(&dnode_sums.dnode_hold_free_overflow, 0); + wmsum_init(&dnode_sums.dnode_free_interior_lock_retry, 0); + wmsum_init(&dnode_sums.dnode_allocate, 0); + wmsum_init(&dnode_sums.dnode_reallocate, 0); + wmsum_init(&dnode_sums.dnode_buf_evict, 0); + wmsum_init(&dnode_sums.dnode_alloc_next_chunk, 0); + wmsum_init(&dnode_sums.dnode_alloc_race, 0); + wmsum_init(&dnode_sums.dnode_alloc_next_block, 0); + wmsum_init(&dnode_sums.dnode_move_invalid, 0); + wmsum_init(&dnode_sums.dnode_move_recheck1, 0); + wmsum_init(&dnode_sums.dnode_move_recheck2, 0); + wmsum_init(&dnode_sums.dnode_move_special, 0); + wmsum_init(&dnode_sums.dnode_move_handle, 0); + wmsum_init(&dnode_sums.dnode_move_rwlock, 0); + wmsum_init(&dnode_sums.dnode_move_active, 0); + dnode_ksp = kstat_create("zfs", 0, "dnodestats", "misc", KSTAT_TYPE_NAMED, sizeof (dnode_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (dnode_ksp != NULL) { dnode_ksp->ks_data = &dnode_stats; + dnode_ksp->ks_update = dnode_kstats_update; kstat_install(dnode_ksp); } } @@ -250,6 +348,35 @@ dnode_fini(void) dnode_ksp = NULL; } + wmsum_fini(&dnode_sums.dnode_hold_dbuf_hold); + wmsum_fini(&dnode_sums.dnode_hold_dbuf_read); + wmsum_fini(&dnode_sums.dnode_hold_alloc_hits); + wmsum_fini(&dnode_sums.dnode_hold_alloc_misses); + wmsum_fini(&dnode_sums.dnode_hold_alloc_interior); + wmsum_fini(&dnode_sums.dnode_hold_alloc_lock_retry); + wmsum_fini(&dnode_sums.dnode_hold_alloc_lock_misses); + wmsum_fini(&dnode_sums.dnode_hold_alloc_type_none); + wmsum_fini(&dnode_sums.dnode_hold_free_hits); + wmsum_fini(&dnode_sums.dnode_hold_free_misses); + wmsum_fini(&dnode_sums.dnode_hold_free_lock_misses); + wmsum_fini(&dnode_sums.dnode_hold_free_lock_retry); + wmsum_fini(&dnode_sums.dnode_hold_free_refcount); + wmsum_fini(&dnode_sums.dnode_hold_free_overflow); + wmsum_fini(&dnode_sums.dnode_free_interior_lock_retry); + wmsum_fini(&dnode_sums.dnode_allocate); + wmsum_fini(&dnode_sums.dnode_reallocate); + wmsum_fini(&dnode_sums.dnode_buf_evict); + wmsum_fini(&dnode_sums.dnode_alloc_next_chunk); + wmsum_fini(&dnode_sums.dnode_alloc_race); + wmsum_fini(&dnode_sums.dnode_alloc_next_block); + wmsum_fini(&dnode_sums.dnode_move_invalid); + wmsum_fini(&dnode_sums.dnode_move_recheck1); + wmsum_fini(&dnode_sums.dnode_move_recheck2); + wmsum_fini(&dnode_sums.dnode_move_special); + wmsum_fini(&dnode_sums.dnode_move_handle); + wmsum_fini(&dnode_sums.dnode_move_rwlock); + wmsum_fini(&dnode_sums.dnode_move_active); + kmem_cache_destroy(dnode_cache); dnode_cache = NULL; } @@ -2298,19 +2425,11 @@ dnode_spill_freed(dnode_t *dn) uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid) { - void *dp = spa_get_dsl(dn->dn_objset->os_spa); int i; if (blkid == DMU_BONUS_BLKID) return (FALSE); - /* - * If we're in the process of opening the pool, dp will not be - * set yet, but there shouldn't be anything dirty. - */ - if (dp == NULL) - return (FALSE); - if (dn->dn_free_txg) return (TRUE); @@ -2595,3 +2714,8 @@ EXPORT_SYMBOL(dnode_free_range); EXPORT_SYMBOL(dnode_evict_dbufs); EXPORT_SYMBOL(dnode_evict_bonus); #endif + +ZFS_MODULE_PARAM(zfs, zfs_, default_bs, INT, ZMOD_RW, + "Default dnode block shift"); +ZFS_MODULE_PARAM(zfs, zfs_, default_ibs, INT, ZMOD_RW, + "Default dnode indirect block shift"); diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index bf1f55e68ff5..872174f5f90d 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -2120,9 +2120,6 @@ dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); VERIFY0(zio_wait(zio)); - - /* dsl_dataset_sync_done will drop this reference. */ - dmu_buf_add_ref(ds->ds_dbuf, ds); dsl_dataset_sync_done(ds, tx); } } diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index a498b9a80753..4e5a0606fa0f 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -1259,9 +1259,6 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); VERIFY0(zio_wait(zio)); - - /* dsl_dataset_sync_done will drop this reference. */ - dmu_buf_add_ref(ds->ds_dbuf, ds); dsl_dataset_sync_done(ds, tx); } } @@ -1747,25 +1744,6 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, } } - /* - * We are not allowed to dirty a filesystem when done receiving - * a snapshot. In this case some flags such as SPA_FEATURE_LARGE_BLOCKS - * will not be set and a subsequent encrypted raw send will fail. Hence - * activate this feature if needed here. This needs to happen only in - * syncing context. - */ - if (dmu_tx_is_syncing(tx)) { - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - if (zfeature_active(f, ds->ds_feature_activation[f]) && - !(zfeature_active(f, ds->ds_feature[f]))) { - dsl_dataset_activate_feature(dsobj, f, - ds->ds_feature_activation[f], tx); - ds->ds_feature[f] = - ds->ds_feature_activation[f]; - } - } - } - ASSERT3U(ds->ds_prev != 0, ==, dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); if (ds->ds_prev) { @@ -2108,16 +2086,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) } dmu_objset_sync(ds->ds_objset, zio, tx); - - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - if (zfeature_active(f, ds->ds_feature_activation[f])) { - if (zfeature_active(f, ds->ds_feature[f])) - continue; - dsl_dataset_activate_feature(ds->ds_object, f, - ds->ds_feature_activation[f], tx); - ds->ds_feature[f] = ds->ds_feature_activation[f]; - } - } } /* @@ -2289,9 +2257,18 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) else ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]); - ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); + for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { + if (zfeature_active(f, + ds->ds_feature_activation[f])) { + if (zfeature_active(f, ds->ds_feature[f])) + continue; + dsl_dataset_activate_feature(ds->ds_object, f, + ds->ds_feature_activation[f], tx); + ds->ds_feature[f] = ds->ds_feature_activation[f]; + } + } - dmu_buf_rele(ds->ds_dbuf, ds); + ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); } int diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index 7681b735ec70..d5fe2ee56804 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -438,6 +438,18 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, } } +/* + * Prefetch metadata required for dle_enqueue_subobj(). + */ +static void +dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, + uint64_t obj) +{ + if (dle->dle_bpobj.bpo_object != + dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) + bpobj_prefetch_subobj(&dle->dle_bpobj, obj); +} + void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) @@ -809,6 +821,27 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth, dle_enqueue_subobj(dl, dle, obj, tx); } +/* + * Prefetch metadata required for dsl_deadlist_insert_bpobj(). + */ +static void +dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + + ASSERT(MUTEX_HELD(&dl->dl_lock)); + + dsl_deadlist_load_tree(dl); + + dle_tofind.dle_mintxg = birth; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + if (dle == NULL) + dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); + dle_prefetch_subobj(dl, dle, obj); +} + static int dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) @@ -825,12 +858,12 @@ dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) { - zap_cursor_t zc; - zap_attribute_t za; + zap_cursor_t zc, pzc; + zap_attribute_t za, pza; dmu_buf_t *bonus; dsl_deadlist_phys_t *dlp; dmu_object_info_t doi; - int error; + int error, perror, i; VERIFY0(dmu_object_info(dl->dl_os, obj, &doi)); if (doi.doi_type == DMU_OT_BPOBJ) { @@ -842,15 +875,32 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) } mutex_enter(&dl->dl_lock); + /* + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ + for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0; + (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128; + zap_cursor_advance(&pzc), i++) { + dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, + zfs_strtonum(pza.za_name, NULL)); + } for (zap_cursor_init(&zc, dl->dl_os, obj); (error = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { uint64_t mintxg = zfs_strtonum(za.za_name, NULL); dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx)); + if (perror == 0) { + dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, + zfs_strtonum(pza.za_name, NULL)); + zap_cursor_advance(&pzc); + perror = zap_cursor_retrieve(&pzc, &pza); + } } VERIFY3U(error, ==, ENOENT); zap_cursor_fini(&zc); + zap_cursor_fini(&pzc); VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); dlp = bonus->db_data; @@ -868,8 +918,9 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; - dsl_deadlist_entry_t *dle; + dsl_deadlist_entry_t *dle, *pdle; avl_index_t where; + int i; ASSERT(!dl->dl_oldfmt); @@ -881,11 +932,23 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); + /* + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ + for (pdle = dle, i = 0; pdle && i < 128; i++) { + bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object); + pdle = AVL_NEXT(&dl->dl_tree, pdle); + } while (dle) { uint64_t used, comp, uncomp; dsl_deadlist_entry_t *dle_next; bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); + if (pdle) { + bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object); + pdle = AVL_NEXT(&dl->dl_tree, pdle); + } VERIFY0(bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp)); diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c index 4036c8671f2d..277560aabfd1 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_pool.c +++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c @@ -786,6 +786,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) } dsl_dataset_sync_done(ds, tx); + dmu_buf_rele(ds->ds_dbuf, ds); } while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index 603fe84ecd04..f3c639b0d04e 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -2005,6 +2005,21 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, return; } + /* + * Check if this block contradicts any filesystem flags. + */ + spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS; + if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + + f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); + if (f != SPA_FEATURE_NONE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + + f = zio_compress_to_feature(BP_GET_COMPRESS(bp)); + if (f != SPA_FEATURE_NONE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { scn->scn_lt_min_this_txg++; return; diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index c9759f35a6fb..1ed79eed3e8b 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -909,7 +909,16 @@ spa_change_guid(spa_t *spa) spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); if (error == 0) { - spa_write_cachefile(spa, B_FALSE, B_TRUE); + /* + * Clear the kobj flag from all the vdevs to allow + * vdev_cache_process_kobj_evt() to post events to all the + * vdevs since GUID is updated. + */ + vdev_clear_kobj_evt(spa->spa_root_vdev); + for (int i = 0; i < spa->spa_l2cache.sav_count; i++) + vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]); + + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); } @@ -5192,7 +5201,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, */ spa_unload(spa); spa_deactivate(spa); - spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); spa_remove(spa); if (locked) mutex_exit(&spa_namespace_lock); @@ -5327,13 +5336,15 @@ spa_add_spares(spa_t *spa, nvlist_t *config) for (i = 0; i < nspares; i++) { guid = fnvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID); + VERIFY0(nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)); if (spa_spare_exists(guid, &pool, NULL) && pool != 0ULL) { - VERIFY0(nvlist_lookup_uint64_array(spares[i], - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, - &vsc)); vs->vs_state = VDEV_STATE_CANT_OPEN; vs->vs_aux = VDEV_AUX_SPARED; + } else { + vs->vs_state = + spa->spa_spares.sav_vdevs[i]->vdev_state; } } } @@ -6012,7 +6023,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_spawn_aux_threads(spa); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE); /* * Don't count references from objsets that are already closed @@ -6073,7 +6084,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) if (props != NULL) spa_configfile_set(spa, props, B_FALSE); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); zfs_dbgmsg("spa_import: verbatim import of %s", pool); mutex_exit(&spa_namespace_lock); @@ -6465,7 +6476,7 @@ export_spa: if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) - spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); spa_remove(spa); } else { /* diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index ad82932ce567..c4282b0cf3a8 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -238,7 +238,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) * would be required. */ void -spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) +spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent, + boolean_t postblkidevent) { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; @@ -344,6 +345,18 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) if (postsysevent) spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC); + + /* + * Post udev event to sync blkid information if the pool is created + * or a new vdev is added to the pool. + */ + if ((target->spa_root_vdev) && postblkidevent) { + vdev_post_kobj_evt(target->spa_root_vdev); + for (int i = 0; i < target->spa_l2cache.sav_count; i++) + vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]); + for (int i = 0; i < target->spa_spares.sav_count; i++) + vdev_post_kobj_evt(target->spa_spares.sav_vdevs[i]); + } } /* @@ -598,6 +611,7 @@ spa_config_update(spa_t *spa, int what) */ if (!spa->spa_is_root) { spa_write_cachefile(spa, B_FALSE, + what != SPA_CONFIG_UPDATE_POOL, what != SPA_CONFIG_UPDATE_POOL); } diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 1c93e7487dda..a57f0727db31 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -1291,7 +1291,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag) * If the config changed, update the config cache. */ if (config_changed) - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE); } /* @@ -1386,7 +1386,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) */ if (config_changed) { mutex_enter(&spa_namespace_lock); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); mutex_exit(&spa_namespace_lock); } diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 00773f89cf6e..4b9d7e7c0506 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -1938,6 +1938,14 @@ vdev_open(vdev_t *vd) error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &logical_ashift, &physical_ashift); + + /* Keep the device in removed state if unplugged */ + if (error == ENOENT && vd->vdev_removed) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED, + VDEV_AUX_NONE); + return (error); + } + /* * Physical volume size should never be larger than its max size, unless * the disk has shrunk while we were reading it or the device is buggy @@ -3156,6 +3164,34 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, mutex_exit(&vd->vdev_dtl_lock); } +/* + * Iterate over all the vdevs except spare, and post kobj events + */ +void +vdev_post_kobj_evt(vdev_t *vd) +{ + if (vd->vdev_ops->vdev_op_kobj_evt_post && + vd->vdev_kobj_flag == B_FALSE) { + vd->vdev_kobj_flag = B_TRUE; + vd->vdev_ops->vdev_op_kobj_evt_post(vd); + } + + for (int c = 0; c < vd->vdev_children; c++) + vdev_post_kobj_evt(vd->vdev_child[c]); +} + +/* + * Iterate over all the vdevs except spare, and clear kobj events + */ +void +vdev_clear_kobj_evt(vdev_t *vd) +{ + vd->vdev_kobj_flag = B_FALSE; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_clear_kobj_evt(vd->vdev_child[c]); +} + int vdev_dtl_load(vdev_t *vd) { @@ -3936,6 +3972,29 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux) return (spa_vdev_state_exit(spa, vd, 0)); } +int +vdev_remove_wanted(spa_t *spa, uint64_t guid) +{ + vdev_t *vd; + + spa_vdev_state_enter(spa, SCL_NONE); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV))); + + /* + * If the vdev is already removed, then don't do anything. + */ + if (vd->vdev_removed) + return (spa_vdev_state_exit(spa, NULL, 0)); + + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(spa, SPA_ASYNC_REMOVE); + + return (spa_vdev_state_exit(spa, vd, 0)); +} + + /* * Online the given vdev. * diff --git a/sys/contrib/openzfs/module/zfs/zap_leaf.c b/sys/contrib/openzfs/module/zfs/zap_leaf.c index aad923d512df..fc25344ea8a8 100644 --- a/sys/contrib/openzfs/module/zfs/zap_leaf.c +++ b/sys/contrib/openzfs/module/zfs/zap_leaf.c @@ -645,7 +645,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, * form of the name. But all callers have one of these on hand anyway, * so might as well take advantage. A cleaner but slower interface * would accept neither argument, and compute the normalized name as - * needed (using zap_name_alloc(zap_entry_read_name(zeh))). + * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))). */ boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, @@ -666,7 +666,7 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, continue; if (zn == NULL) { - zn = zap_name_alloc(zap, name, MT_NORMALIZE); + zn = zap_name_alloc_str(zap, name, MT_NORMALIZE); allocdzn = B_TRUE; } if (zap_leaf_array_match(zeh->zeh_leaf, zn, diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c index 516d46ac7f31..e3dadf130413 100644 --- a/sys/contrib/openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/openzfs/module/zfs/zap_micro.c @@ -33,7 +33,7 @@ #include <sys/zap.h> #include <sys/zap_impl.h> #include <sys/zap_leaf.h> -#include <sys/avl.h> +#include <sys/btree.h> #include <sys/arc.h> #include <sys/dmu_objset.h> @@ -92,7 +92,7 @@ zap_hash(zap_name_t *zn) wp++, i++) { uint64_t word = *wp; - for (int j = 0; j < zn->zn_key_intlen; j++) { + for (int j = 0; j < 8; j++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ word) & 0xFF]; word >>= NBBY; @@ -162,18 +162,25 @@ zap_match(zap_name_t *zn, const char *matchname) } } +static zap_name_t * +zap_name_alloc(zap_t *zap) +{ + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zn->zn_zap = zap; + return (zn); +} + void zap_name_free(zap_name_t *zn) { kmem_free(zn, sizeof (zap_name_t)); } -zap_name_t * -zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) +static int +zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zap_t *zap = zn->zn_zap; - zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = key; zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; @@ -194,17 +201,13 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) * what the hash is computed from. */ if (zap_normalize(zap, key, zn->zn_normbuf, - zap->zap_normflags) != 0) { - zap_name_free(zn); - return (NULL); - } + zap->zap_normflags) != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm = zn->zn_normbuf; zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } else { - if (mt != 0) { - zap_name_free(zn); - return (NULL); - } + if (mt != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm = zn->zn_key_orig; zn->zn_key_norm_numints = zn->zn_key_orig_numints; } @@ -217,13 +220,22 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) * what the matching is based on. (Not the hash!) */ if (zap_normalize(zap, key, zn->zn_normbuf, - zn->zn_normflags) != 0) { - zap_name_free(zn); - return (NULL); - } + zn->zn_normflags) != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } + return (0); +} + +zap_name_t * +zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt) +{ + zap_name_t *zn = zap_name_alloc(zap); + if (zap_name_init_str(zn, key, mt) != 0) { + zap_name_free(zn); + return (NULL); + } return (zn); } @@ -277,45 +289,46 @@ mze_compare(const void *arg1, const void *arg2) const mzap_ent_t *mze1 = arg1; const mzap_ent_t *mze2 = arg2; - int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash); - if (likely(cmp)) - return (cmp); - - return (TREE_CMP(mze1->mze_cd, mze2->mze_cd)); + return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd, + (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd)); } static void -mze_insert(zap_t *zap, int chunkid, uint64_t hash) +mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash) { + mzap_ent_t mze; + ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); - mze->mze_chunkid = chunkid; - mze->mze_hash = hash; - mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; - ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); - avl_add(&zap->zap_m.zap_avl, mze); + mze.mze_chunkid = chunkid; + ASSERT0(hash & 0xffffffff); + mze.mze_hash = hash >> 32; + ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff); + mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd; + ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0); + zfs_btree_add(&zap->zap_m.zap_tree, &mze); } static mzap_ent_t * -mze_find(zap_name_t *zn) +mze_find(zap_name_t *zn, zfs_btree_index_t *idx) { mzap_ent_t mze_tofind; mzap_ent_t *mze; - avl_index_t idx; - avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; + zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree; ASSERT(zn->zn_zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); - mze_tofind.mze_hash = zn->zn_hash; + ASSERT0(zn->zn_hash & 0xffffffff); + mze_tofind.mze_hash = zn->zn_hash >> 32; mze_tofind.mze_cd = 0; - mze = avl_find(avl, &mze_tofind, &idx); + mze = zfs_btree_find(tree, &mze_tofind, idx); if (mze == NULL) - mze = avl_nearest(avl, idx, AVL_AFTER); - for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { + mze = zfs_btree_next(tree, idx, idx); + for (; mze && mze->mze_hash == mze_tofind.mze_hash; + mze = zfs_btree_next(tree, idx, idx)) { ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) return (mze); @@ -328,18 +341,21 @@ static uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash) { mzap_ent_t mze_tofind; - avl_index_t idx; - avl_tree_t *avl = &zap->zap_m.zap_avl; + zfs_btree_index_t idx; + zfs_btree_t *tree = &zap->zap_m.zap_tree; ASSERT(zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + ASSERT0(hash & 0xffffffff); + hash >>= 32; mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; uint32_t cd = 0; - for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx); - mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); + mze && mze->mze_hash == hash; + mze = zfs_btree_next(tree, &idx, &idx)) { if (mze->mze_cd != cd) break; cd++; @@ -364,16 +380,18 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) { zap_t *zap = zn->zn_zap; mzap_ent_t mze_tofind; - mzap_ent_t *mze; - avl_index_t idx; - avl_tree_t *avl = &zap->zap_m.zap_avl; + zfs_btree_index_t idx; + zfs_btree_t *tree = &zap->zap_m.zap_tree; uint32_t mzap_ents = 0; + ASSERT0(hash & 0xffffffff); + hash >>= 32; mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; - for (mze = avl_find(avl, &mze_tofind, &idx); - mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); + mze && mze->mze_hash == hash; + mze = zfs_btree_next(tree, &idx, &idx)) { mzap_ents++; } @@ -384,24 +402,10 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) } static void -mze_remove(zap_t *zap, mzap_ent_t *mze) -{ - ASSERT(zap->zap_ismicro); - ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - - avl_remove(&zap->zap_m.zap_avl, mze); - kmem_free(mze, sizeof (mzap_ent_t)); -} - -static void mze_destroy(zap_t *zap) { - mzap_ent_t *mze; - void *avlcookie = NULL; - - while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) - kmem_free(mze, sizeof (mzap_ent_t)); - avl_destroy(&zap->zap_m.zap_avl); + zfs_btree_clear(&zap->zap_m.zap_tree); + zfs_btree_destroy(&zap->zap_m.zap_tree); } static zap_t * @@ -448,21 +452,26 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) zap->zap_salt = zap_m_phys(zap)->mz_salt; zap->zap_normflags = zap_m_phys(zap)->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; - avl_create(&zap->zap_m.zap_avl, mze_compare, - sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); - for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { + /* + * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove() + * overhead on massive inserts below. It still allows to store + * 62 entries before we have to add 2KB B-tree core node. + */ + zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare, + sizeof (mzap_ent_t), 512); + + zap_name_t *zn = zap_name_alloc(zap); + for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0]) { - zap_name_t *zn; - zap->zap_m.zap_num_entries++; - zn = zap_name_alloc(zap, mze->mze_name, 0); + zap_name_init_str(zn, mze->mze_name, 0); mze_insert(zap, i, zn->zn_hash); - zap_name_free(zn); } } + zap_name_free(zn); } else { zap->zap_salt = zap_f_phys(zap)->zap_salt; zap->zap_normflags = zap_f_phys(zap)->zap_normflags; @@ -655,24 +664,25 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags) dprintf("upgrading obj=%llu with %u chunks\n", (u_longlong_t)zap->zap_object, nchunks); - /* XXX destroy the avl later, so we can use the stored hash value */ + /* XXX destroy the tree later, so we can use the stored hash value */ mze_destroy(zap); fzap_upgrade(zap, tx, flags); + zap_name_t *zn = zap_name_alloc(zap); for (int i = 0; i < nchunks; i++) { mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; if (mze->mze_name[0] == 0) continue; dprintf("adding %s=%llu\n", mze->mze_name, (u_longlong_t)mze->mze_value); - zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0); + zap_name_init_str(zn, mze->mze_name, 0); /* If we fail here, we would end up losing entries */ VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tag, tx)); zap = zn->zn_zap; /* fzap_add_cd() may change zap */ - zap_name_free(zn); } + zap_name_free(zn); vmem_free(mzp, sz); *zapp = zap; return (0); @@ -914,22 +924,23 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) * See also the comment above zap_entry_normalization_conflict(). */ static boolean_t -mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) +mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze, + zfs_btree_index_t *idx) { - int direction = AVL_BEFORE; boolean_t allocdzn = B_FALSE; + mzap_ent_t *other; + zfs_btree_index_t oidx; if (zap->zap_normflags == 0) return (B_FALSE); -again: - for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction); + for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx); other && other->mze_hash == mze->mze_hash; - other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { + other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) { if (zn == NULL) { - zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, - MT_NORMALIZE); + zn = zap_name_alloc_str(zap, + MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); allocdzn = B_TRUE; } if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { @@ -939,9 +950,20 @@ again: } } - if (direction == AVL_BEFORE) { - direction = AVL_AFTER; - goto again; + for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx); + other && other->mze_hash == mze->mze_hash; + other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) { + + if (zn == NULL) { + zn = zap_name_alloc_str(zap, + MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); + allocdzn = B_TRUE; + } + if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { + if (allocdzn) + zap_name_free(zn); + return (B_TRUE); + } } if (allocdzn) @@ -969,7 +991,7 @@ zap_lookup_impl(zap_t *zap, const char *name, { int err = 0; - zap_name_t *zn = zap_name_alloc(zap, name, mt); + zap_name_t *zn = zap_name_alloc_str(zap, name, mt); if (zn == NULL) return (SET_ERROR(ENOTSUP)); @@ -977,7 +999,8 @@ zap_lookup_impl(zap_t *zap, const char *name, err = fzap_lookup(zn, integer_size, num_integers, buf, realname, rn_len, ncp); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { @@ -994,7 +1017,7 @@ zap_lookup_impl(zap_t *zap, const char *name, rn_len); if (ncp) { *ncp = mzap_normalization_conflict(zap, - zn, mze); + zn, mze, &idx); } } } @@ -1031,7 +1054,7 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); if (err) return (err); - zn = zap_name_alloc(zap, name, 0); + zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1134,7 +1157,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); if (err != 0) return (err); - zap_name_t *zn = zap_name_alloc(zap, name, 0); + zap_name_t *zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1142,7 +1165,8 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, if (!zap->zap_ismicro) { err = fzap_length(zn, integer_size, num_integers); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { @@ -1182,7 +1206,7 @@ static void mzap_addent(zap_name_t *zn, uint64_t value) { zap_t *zap = zn->zn_zap; - int start = zap->zap_m.zap_alloc_next; + uint16_t start = zap->zap_m.zap_alloc_next; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); @@ -1198,7 +1222,7 @@ mzap_addent(zap_name_t *zn, uint64_t value) ASSERT(cd < zap_maxcd(zap)); again: - for (int i = start; i < zap->zap_m.zap_num_chunks; i++) { + for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0] == 0) { mze->mze_value = value; @@ -1229,7 +1253,7 @@ zap_add_impl(zap_t *zap, const char *key, const uint64_t *intval = val; int err = 0; - zap_name_t *zn = zap_name_alloc(zap, key, 0); + zap_name_t *zn = zap_name_alloc_str(zap, key, 0); if (zn == NULL) { zap_unlockdir(zap, tag); return (SET_ERROR(ENOTSUP)); @@ -1247,7 +1271,8 @@ zap_add_impl(zap_t *zap, const char *key, } zap = zn->zn_zap; /* fzap_add() may change zap */ } else { - if (mze_find(zn) != NULL) { + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) { err = SET_ERROR(EEXIST); } else { mzap_addent(zn, *intval); @@ -1327,7 +1352,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); if (err != 0) return (err); - zap_name_t *zn = zap_name_alloc(zap, name, 0); + zap_name_t *zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1348,7 +1373,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, } zap = zn->zn_zap; /* fzap_update() may change zap */ } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze != NULL) { MZE_PHYS(zap, mze)->mze_value = *intval; } else { @@ -1398,20 +1424,20 @@ zap_remove_impl(zap_t *zap, const char *name, { int err = 0; - zap_name_t *zn = zap_name_alloc(zap, name, mt); + zap_name_t *zn = zap_name_alloc_str(zap, name, mt); if (zn == NULL) return (SET_ERROR(ENOTSUP)); if (!zap->zap_ismicro) { err = fzap_remove(zn, tx); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { zap->zap_m.zap_num_entries--; - bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], - sizeof (mzap_ent_phys_t)); - mze_remove(zap, mze); + memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t)); + zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx); } } zap_name_free(zn); @@ -1582,29 +1608,30 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { - avl_index_t idx; + zfs_btree_index_t idx; mzap_ent_t mze_tofind; - mze_tofind.mze_hash = zc->zc_hash; + mze_tofind.mze_hash = zc->zc_hash >> 32; mze_tofind.mze_cd = zc->zc_cd; - mzap_ent_t *mze = - avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); + mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree, + &mze_tofind, &idx); if (mze == NULL) { - mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, - idx, AVL_AFTER); + mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree, + &idx, &idx); } if (mze) { mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); za->za_normalization_conflict = - mzap_normalization_conflict(zc->zc_zap, NULL, mze); + mzap_normalization_conflict(zc->zc_zap, NULL, + mze, &idx); za->za_integer_length = 8; za->za_num_integers = 1; za->za_first_integer = mzep->mze_value; (void) strlcpy(za->za_name, mzep->mze_name, sizeof (za->za_name)); - zc->zc_hash = mze->mze_hash; + zc->zc_hash = (uint64_t)mze->mze_hash << 32; zc->zc_cd = mze->mze_cd; err = 0; } else { diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 4601ef52788a..a4b391cbea12 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -1921,6 +1921,10 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc) error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); break; + case VDEV_STATE_REMOVED: + error = vdev_remove_wanted(spa, zc->zc_guid); + break; + default: error = SET_ERROR(EINVAL); } @@ -2928,7 +2932,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc) mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) { spa_configfile_set(spa, props, B_FALSE); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); } mutex_exit(&spa_namespace_lock); if (spa != NULL) { diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 918938d62823..b9498d17ee2f 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -105,7 +105,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) if (zn_has_cached_data(zp)) zn_flush_cached_data(zp, B_FALSE); - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); zfs_rangelock_exit(lr); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index 11e05e477839..aaf509a2fc73 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -92,6 +92,14 @@ int zfs_commit_timeout_pct = 5; /* + * Minimal time we care to delay commit waiting for more ZIL records. + * At least FreeBSD kernel can't sleep for less than 2us at its best. + * So requests to sleep for less then 5us is a waste of CPU time with + * a risk of significant log latency increase due to oversleep. + */ +static unsigned long zil_min_commit_timeout = 5000; + +/* * See zil.h for more information about these fields. */ zil_stats_t zil_stats = { @@ -1155,7 +1163,8 @@ zil_lwb_flush_vdevs_done(zio_t *zio) lwb->lwb_tx = NULL; ASSERT3U(lwb->lwb_issued_timestamp, >, 0); - zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp; + zilog->zl_last_lwb_latency = (zilog->zl_last_lwb_latency * 3 + + gethrtime() - lwb->lwb_issued_timestamp) / 4; lwb->lwb_root_zio = NULL; @@ -2283,8 +2292,9 @@ zil_process_commit_list(zilog_t *zilog) spa_t *spa = zilog->zl_spa; list_t nolwb_itxs; list_t nolwb_waiters; - lwb_t *lwb; + lwb_t *lwb, *plwb; itx_t *itx; + boolean_t first = B_TRUE; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); @@ -2306,6 +2316,9 @@ zil_process_commit_list(zilog_t *zilog) ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); + first = (lwb->lwb_state != LWB_STATE_OPENED) && + ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL || + plwb->lwb_state == LWB_STATE_FLUSH_DONE); } while ((itx = list_head(&zilog->zl_itx_commit_list)) != NULL) { @@ -2476,7 +2489,23 @@ zil_process_commit_list(zilog_t *zilog) * try and pack as many itxs into as few lwbs as * possible, without significantly impacting the latency * of each individual itx. + * + * If we had no already running or open LWBs, it can be + * the workload is single-threaded. And if the ZIL write + * latency is very small or if the LWB is almost full, it + * may be cheaper to bypass the delay. */ + if (lwb->lwb_state == LWB_STATE_OPENED && first) { + hrtime_t sleep = zilog->zl_last_lwb_latency * + zfs_commit_timeout_pct / 100; + if (sleep < zil_min_commit_timeout || + lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { + lwb = zil_lwb_write_issue(zilog, lwb); + zilog->zl_cur_used = 0; + if (lwb == NULL) + zil_commit_writer_stall(zilog); + } + } } } @@ -3726,6 +3755,9 @@ EXPORT_SYMBOL(zil_set_logbias); ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, INT, ZMOD_RW, "ZIL block open timeout percentage"); +ZFS_MODULE_PARAM(zfs_zil, zil_, min_commit_timeout, ULONG, ZMOD_RW, + "Minimum delay we care for ZIL block commit"); + ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW, "Disable intent logging replay"); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index c1fd2de2e586..700f8791045f 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -3928,7 +3928,7 @@ zio_vdev_io_done(zio_t *zio) ops->vdev_op_io_done(zio); - if (unexpected_error) + if (unexpected_error && vd->vdev_remove_wanted == B_FALSE) VERIFY(vdev_probe(vd, zio) == NULL); return (zio); diff --git a/sys/contrib/openzfs/module/zfs/zrlock.c b/sys/contrib/openzfs/module/zfs/zrlock.c index a4def6053622..8b6755bc9360 100644 --- a/sys/contrib/openzfs/module/zfs/zrlock.c +++ b/sys/contrib/openzfs/module/zfs/zrlock.c @@ -106,16 +106,16 @@ zrl_add_impl(zrlock_t *zrl, const char *zc) void zrl_remove(zrlock_t *zrl) { - uint32_t n; - #ifdef ZFS_DEBUG if (zrl->zr_owner == curthread) { zrl->zr_owner = NULL; zrl->zr_caller = NULL; } + int32_t n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount); + ASSERT3S(n, >=, 0); +#else + atomic_dec_32((uint32_t *)&zrl->zr_refcount); #endif - n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount); - ASSERT3S((int32_t)n, >=, 0); } int diff --git a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in index 920b90e88912..22beb6b68ae3 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in @@ -1,6 +1,6 @@ %{?!packager: %define packager Brian Behlendorf <behlendorf1@llnl.gov>} -%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version} +%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}%{?openEuler} %define not_rpm 1 %endif @@ -28,7 +28,7 @@ Requires(post): dkms >= 2.2.0.3 Requires(preun): dkms >= 2.2.0.3 Requires: gcc, make, perl, diffutils Requires(post): gcc, make, perl, diffutils -%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version} +%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}%{?openEuler} Requires: kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999 Requires(post): kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999 Obsoletes: spl-dkms <= %{version} @@ -36,7 +36,7 @@ Obsoletes: spl-dkms <= %{version} Provides: %{module}-kmod = %{version} AutoReqProv: no -%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 diff --git a/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in index 3061fb6ade26..0093e49c1e37 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in @@ -1,7 +1,7 @@ %define module @PACKAGE@ %if !%{defined ksrc} -%if 0%{?rhel}%{?fedora} +%if 0%{?rhel}%{?fedora}%{?openEuler} %define ksrc ${kernel_version##*___} %else %define ksrc "$( \ @@ -16,7 +16,7 @@ %endif %if !%{defined kobj} -%if 0%{?rhel}%{?fedora} +%if 0%{?rhel}%{?fedora}%{?openEuler} %define kobj ${kernel_version##*___} %else %define kobj "$( \ @@ -52,12 +52,12 @@ URL: https://github.com/openzfs/zfs Source0: %{module}-%{version}.tar.gz Source10: kmodtool BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id} -u -n) -%if 0%{?rhel}%{?fedora} +%if 0%{?rhel}%{?fedora}%{?openEuler} BuildRequires: gcc, make BuildRequires: elfutils-libelf-devel %endif -%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 @@ -79,10 +79,11 @@ BuildRequires: %{_bindir}/kmodtool # Building local packages attempt to to use the installed kernel. %{?rhel:BuildRequires: kernel-devel} %{?fedora:BuildRequires: kernel-devel} +%{?openEuler:BuildRequires: kernel-devel} %{?suse_version:BuildRequires: kernel-source} %if !%{defined kernels} && !%{defined build_src_rpm} - %if 0%{?rhel}%{?fedora}%{?suse_version} + %if 0%{?rhel}%{?fedora}%{?suse_version}%{?openEuler} %define kernels %(ls -1 /usr/src/kernels) %else %define kernels %(ls -1 /lib/modules) diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index 8cab1c3d70bb..3dce92acbbf1 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -3,7 +3,7 @@ # Set the default udev directory based on distribution. %if %{undefined _udevdir} -%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7 +%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %global _udevdir %{_prefix}/lib/udev %else %global _udevdir /lib/udev @@ -12,7 +12,7 @@ # Set the default udevrule directory based on distribution. %if %{undefined _udevruledir} -%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7 +%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %global _udevruledir %{_prefix}/lib/udev/rules.d %else %global _udevruledir /lib/udev/rules.d @@ -21,7 +21,7 @@ # Set the default dracut directory based on distribution. %if %{undefined _dracutdir} -%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7 +%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %global _dracutdir %{_prefix}/lib/dracut %else %global _dracutdir %{_prefix}/share/dracut @@ -57,59 +57,28 @@ %bcond_with asan %bcond_with systemd %bcond_with pam +%bcond_without pyzfs # Generic enable switch for systemd %if %{with systemd} %define _systemd 1 %endif -# RHEL >= 7 comes with systemd -%if 0%{?rhel} >= 7 +# Distros below support systemd +%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version} %define _systemd 1 %endif -# Fedora >= 15 comes with systemd, but only >= 18 has -# the proper macros -%if 0%{?fedora} >= 18 -%define _systemd 1 -%endif - -# opensuse >= 12.1 comes with systemd, but only >= 13.1 -# has the proper macros -%if 0%{?suse_version} >= 1310 -%define _systemd 1 -%endif - -# When not specified default to distribution provided version. This -# is normally Python 3, but for RHEL <= 7 only Python 2 is provided. +# When not specified default to distribution provided version. %if %{undefined __use_python} -%if 0%{?rhel} && 0%{?rhel} <= 7 -%define __python /usr/bin/python2 -%define __python_pkg_version 2 -%define __python_cffi_pkg python-cffi -%define __python_setuptools_pkg python-setuptools -%else %define __python /usr/bin/python3 %define __python_pkg_version 3 -%define __python_cffi_pkg python3-cffi -%define __python_setuptools_pkg python3-setuptools -%endif %else %define __python %{__use_python} %define __python_pkg_version %{__use_python_pkg_version} -%define __python_cffi_pkg python%{__python_pkg_version}-cffi -%define __python_setuptools_pkg python%{__python_pkg_version}-setuptools %endif %define __python_sitelib %(%{__python} -Esc "from distutils.sysconfig import get_python_lib; print(get_python_lib())") -# By default python-pyzfs is enabled, with the exception of -# RHEL 6 which by default uses Python 2.6 which is too old. -%if 0%{?rhel} == 6 -%bcond_with pyzfs -%else -%bcond_without pyzfs -%endif - Name: @PACKAGE@ Version: @VERSION@ Release: @RELEASE@%{?dist} @@ -132,7 +101,7 @@ Obsoletes: spl <= %{version} # Renaming those on either side would conflict with all available documentation. Conflicts: zfs-fuse -%if 0%{?rhel}%{?fedora}%{?suse_version} +%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler} BuildRequires: gcc, make BuildRequires: zlib-devel BuildRequires: libuuid-devel @@ -140,11 +109,11 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel -%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 +%if 0%{?fedora}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif -%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 @@ -285,7 +254,7 @@ Requires: sudo Requires: sysstat Requires: libaio Requires: python%{__python_pkg_version} -%if 0%{?rhel}%{?fedora}%{?suse_version} +%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler} BuildRequires: libaio-devel %endif AutoReqProv: no @@ -308,6 +277,8 @@ This package contains a dracut module used to construct an initramfs image which is ZFS aware. %if %{with pyzfs} +# Enforce `python36-` package prefix for CentOS 7 +# since dependencies come from EPEL and are named this way %package -n python%{__python_pkg_version}-pyzfs Summary: Python %{python_version} wrapper for libzfs_core Group: Development/Languages/Python @@ -317,16 +288,26 @@ Requires: libzfs5 = %{version}-%{release} Requires: libnvpair3 = %{version}-%{release} Requires: libffi Requires: python%{__python_pkg_version} -Requires: %{__python_cffi_pkg} -%if 0%{?rhel}%{?fedora}%{?suse_version} -%if 0%{?rhel} >= 8 || 0%{?centos} >= 8 || 0%{?fedora} >= 28 -BuildRequires: python3-packaging + +%if 0%{?centos} == 7 +Requires: python36-cffi %else -BuildRequires: python-packaging +Requires: python%{__python_pkg_version}-cffi %endif + +%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler} +%if 0%{?centos} == 7 +BuildRequires: python36-packaging +BuildRequires: python36-devel +BuildRequires: python36-cffi +BuildRequires: python36-setuptools +%else +BuildRequires: python%{__python_pkg_version}-packaging BuildRequires: python%{__python_pkg_version}-devel -BuildRequires: %{__python_cffi_pkg} -BuildRequires: %{__python_setuptools_pkg} +BuildRequires: python%{__python_pkg_version}-cffi +BuildRequires: python%{__python_pkg_version}-setuptools +%endif + BuildRequires: libffi-devel %endif @@ -485,7 +466,7 @@ systemctl --system daemon-reload >/dev/null || true %{_bindir}/raidz_test %{_sbindir}/zgenhostid %{_bindir}/zvol_wait -# Optional Python 2/3 scripts +# Optional Python 3 scripts %{_bindir}/arc_summary %{_bindir}/arcstat %{_bindir}/dbufstat diff --git a/sys/contrib/openzfs/scripts/Makefile.am b/sys/contrib/openzfs/scripts/Makefile.am index 6c59fd7d4faf..047ae7eaca6d 100644 --- a/sys/contrib/openzfs/scripts/Makefile.am +++ b/sys/contrib/openzfs/scripts/Makefile.am @@ -27,7 +27,7 @@ EXTRA_DIST = \ zol2zfs-patch.sed \ $(EXTRA_SCRIPTS) -SHELLCHECK_IGNORE = ,SC1117 +SHELLCHECK_IGNORE = ,SC1117,SC2086,SC2295 SHELLCHECKSCRIPTS = $(EXTRA_SCRIPTS) define EXTRA_ENVIRONMENT diff --git a/sys/contrib/openzfs/scripts/zfs-tests.sh b/sys/contrib/openzfs/scripts/zfs-tests.sh index aa0829b28326..1e0cf66d1cdc 100755 --- a/sys/contrib/openzfs/scripts/zfs-tests.sh +++ b/sys/contrib/openzfs/scripts/zfs-tests.sh @@ -38,6 +38,7 @@ VERBOSE="no" QUIET="" CLEANUP="yes" CLEANUPALL="no" +KMSG="" LOOPBACK="yes" STACK_TRACER="no" FILESIZE="4G" @@ -53,6 +54,7 @@ ZFS_DBGMSG="$STF_SUITE/callbacks/zfs_dbgmsg.ksh" ZFS_DMESG="$STF_SUITE/callbacks/zfs_dmesg.ksh" UNAME=$(uname -s) RERUN="" +KMEMLEAK="" # Override some defaults if on FreeBSD if [ "$UNAME" = "FreeBSD" ] ; then @@ -324,10 +326,12 @@ OPTIONS: -q Quiet test-runner output -x Remove all testpools, dm, lo, and files (unsafe) -k Disable cleanup after test failure + -K Log test names to /dev/kmsg -f Use files only, disables block device tests -S Enable stack tracer (negative performance impact) -c Only create and populate constrained path -R Automatically rerun failing tests + -m Enable kmemleak reporting (Linux only) -n NFSFILE Use the nfsfile to determine the NFS configuration -I NUM Number of iterations -d DIR Use DIR for files and loopback devices @@ -354,7 +358,7 @@ $0 -x EOF } -while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do +while getopts 'hvqxkKfScRmn:d:s:r:?t:T:u:I:' OPTION; do case $OPTION in h) usage @@ -372,6 +376,9 @@ while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do k) CLEANUP="no" ;; + K) + KMSG="yes" + ;; f) LOOPBACK="no" ;; @@ -385,6 +392,9 @@ while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do R) RERUN="yes" ;; + m) + KMEMLEAK="yes" + ;; n) nfsfile=$OPTARG [ -f "$nfsfile" ] || fail "Cannot read file: $nfsfile" @@ -694,12 +704,16 @@ REPORT_FILE=$(mktemp_file zts-report) # # Run all the tests as specified. # -msg "${TEST_RUNNER} ${QUIET:+-q}" \ +msg "${TEST_RUNNER}" \ + "${QUIET:+-q}" \ + "${KMEMLEAK:+-m}" \ + "${KMSG:+-K}" \ "-c \"${RUNFILES}\"" \ "-T \"${TAGS}\"" \ "-i \"${STF_SUITE}\"" \ "-I \"${ITERATIONS}\"" -${TEST_RUNNER} ${QUIET:+-q} \ +${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \ + ${KMSG:+-K} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ @@ -719,7 +733,7 @@ if [ "$RESULT" -eq "2" ] && [ -n "$RERUN" ]; then for test_name in $MAYBES; do grep "$test_name " "$TEMP_RESULTS_FILE" >>"$TEST_LIST" done - ${TEST_RUNNER} ${QUIET:+-q} \ + ${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ diff --git a/sys/contrib/openzfs/tests/Makefile.am b/sys/contrib/openzfs/tests/Makefile.am index 1dfc2cc5f518..d8277ef2dff7 100644 --- a/sys/contrib/openzfs/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/Makefile.am @@ -4,5 +4,6 @@ SUBDIRS = runfiles test-runner zfs-tests EXTRA_DIST = README.md +SHELLCHECK_IGNORE = ,SC2155 SHELLCHECKSCRIPTS = $$(find . -name '*.sh') .PHONY: $(SHELLCHECKSCRIPTS) diff --git a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in index d32e05c45392..a652d3d4a0ff 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in @@ -15,19 +15,14 @@ # Copyright (c) 2012, 2018 by Delphix. All rights reserved. # Copyright (c) 2019 Datto Inc. # -# This script must remain compatible with Python 2.6+ and Python 3.4+. +# This script must remain compatible with Python 3.6+. # -# some python 2.7 system don't have a configparser shim -try: - import configparser -except ImportError: - import ConfigParser as configparser - import os import sys import ctypes import re +import configparser from datetime import datetime from optparse import OptionParser @@ -36,11 +31,14 @@ from pwd import getpwuid from select import select from subprocess import PIPE from subprocess import Popen +from subprocess import check_output from threading import Timer -from time import time +from time import time, CLOCK_MONOTONIC_RAW +from os.path import exists BASEDIR = '/var/tmp/test_results' TESTDIR = '/usr/share/zfs/' +KMEMLEAK_FILE = '/sys/kernel/debug/kmemleak' KILL = 'kill' TRUE = 'true' SUDO = 'sudo' @@ -49,9 +47,6 @@ LOG_OUT = 'LOG_OUT' LOG_ERR = 'LOG_ERR' LOG_FILE_OBJ = None -# some python 2.7 system don't have a concept of monotonic time -CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h> - class timespec(ctypes.Structure): _fields_ = [ @@ -83,6 +78,7 @@ class Result(object): self.runtime = '' self.stdout = [] self.stderr = [] + self.kmemleak = '' self.result = '' def done(self, proc, killed, reran): @@ -98,6 +94,9 @@ class Result(object): if killed: self.result = 'KILLED' Result.runresults['KILLED'] += 1 + elif len(self.kmemleak) > 0: + self.result = 'FAIL' + Result.runresults['FAIL'] += 1 elif self.returncode == 0: self.result = 'PASS' Result.runresults['PASS'] += 1 @@ -258,7 +257,7 @@ User: %s return out.lines, err.lines - def run(self, dryrun): + def run(self, dryrun, kmemleak, kmsg): """ This is the main function that runs each individual test. Determine whether or not the command requires sudo, and modify it @@ -277,7 +276,24 @@ User: %s except OSError as e: fail('%s' % e) + """ + Log each test we run to /dev/kmsg (on Linux), so if there's a kernel + warning we'll be able to match it up to a particular test. + """ + if kmsg is True and exists("/dev/kmsg"): + try: + kp = Popen([SUDO, "sh", "-c", + f"echo ZTS run {self.pathname} > /dev/kmsg"]) + kp.wait() + except Exception: + pass + self.result.starttime = monotonic_time() + + if kmemleak: + cmd = f'echo clear | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + proc = Popen(privcmd, stdout=PIPE, stderr=PIPE) # Allow a special timeout value of 0 to mean infinity if int(self.timeout) == 0: @@ -287,6 +303,12 @@ User: %s try: t.start() self.result.stdout, self.result.stderr = self.collect_output(proc) + + if kmemleak: + cmd = f'echo scan | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + cmd = f'{SUDO} cat {KMEMLEAK_FILE}' + self.result.kmemleak = check_output(cmd, shell=True) except KeyboardInterrupt: self.kill_cmd(proc, True) fail('\nRun terminated at user request.') @@ -363,6 +385,9 @@ User: %s with open(os.path.join(self.outputdir, 'merged'), 'wb') as merged: for _, line in lines: os.write(merged.fileno(), b'%s\n' % line) + if len(self.result.kmemleak): + with open(os.path.join(self.outputdir, 'kmemleak'), 'wb') as kmem: + kmem.write(self.result.kmemleak) class Test(Cmd): @@ -447,14 +472,14 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun) + pretest.run(options.dryrun, False, options.kmsg) cont = pretest.result.result == 'PASS' pretest.log(options) if cont: - test.run(options.dryrun) + test.run(options.dryrun, options.kmemleak, options.kmsg) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun) + failsafe.run(options.dryrun, False, options.kmsg) failsafe.log(options, suppress_console=True) else: test.skip() @@ -462,7 +487,7 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun) + posttest.run(options.dryrun, False, options.kmsg) posttest.log(options) @@ -565,7 +590,7 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun) + pretest.run(options.dryrun, False, options.kmsg) cont = pretest.result.result == 'PASS' pretest.log(options) @@ -578,9 +603,9 @@ Tags: %s failsafe = Cmd(self.failsafe, outputdir=odir, timeout=self.timeout, user=self.failsafe_user, identifier=self.identifier) if cont: - test.run(options.dryrun) + test.run(options.dryrun, options.kmemleak, options.kmsg) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun) + failsafe.run(options.dryrun, False, options.kmsg) failsafe.log(options, suppress_console=True) else: test.skip() @@ -588,7 +613,7 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun) + posttest.run(options.dryrun, False, options.kmsg) posttest.log(options) @@ -853,6 +878,11 @@ class TestRun(object): else: write_log('Could not make a symlink to directory %s\n' % self.outputdir, LOG_ERR) + + if options.kmemleak: + cmd = f'echo scan=0 | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + iteration = 0 while iteration < options.iterations: for test in sorted(self.tests.keys()): @@ -998,6 +1028,14 @@ def fail(retstr, ret=1): exit(ret) +def kmemleak_cb(option, opt_str, value, parser): + if not os.path.exists(KMEMLEAK_FILE): + fail(f"File '{KMEMLEAK_FILE}' doesn't exist. " + + "Enable CONFIG_DEBUG_KMEMLEAK in kernel configuration.") + + setattr(parser.values, option.dest, True) + + def options_cb(option, opt_str, value, parser): path_options = ['outputdir', 'template', 'testdir', 'logfile'] @@ -1035,6 +1073,11 @@ def parse_args(): parser.add_option('-i', action='callback', callback=options_cb, default=TESTDIR, dest='testdir', type='string', metavar='testdir', help='Specify a test directory.') + parser.add_option('-K', action='store_true', default=False, dest='kmsg', + help='Log tests names to /dev/kmsg') + parser.add_option('-m', action='callback', callback=kmemleak_cb, + default=False, dest='kmemleak', + help='Enable kmemleak reporting (Linux only)') parser.add_option('-p', action='callback', callback=options_cb, default='', dest='pre', metavar='script', type='string', help='Specify a pre script.') diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index 71b0cc8d6483..432899c21f4d 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -15,7 +15,7 @@ # Copyright (c) 2017 by Delphix. All rights reserved. # Copyright (c) 2018 by Lawrence Livermore National Security, LLC. # -# This script must remain compatible with Python 2.6+ and Python 3.4+. +# This script must remain compatible with Python 3.6+. # import os @@ -62,13 +62,13 @@ known_reason = 'Known issue' exec_reason = 'Test user execute permissions required for utilities' # -# Some tests require a minimum python version of 3.5 and will be skipped when +# Some tests require a minimum python version of 3.6 and will be skipped when # the default system version is too old. There may also be tests which require -# additional python modules be installed, for example python-cffi is required +# additional python modules be installed, for example python3-cffi is required # by the pyzfs tests. # -python_reason = 'Python v3.5 or newer required' -python_deps_reason = 'Python modules missing: python-cffi' +python_reason = 'Python v3.6 or newer required' +python_deps_reason = 'Python modules missing: python3-cffi' # # Some tests require the O_TMPFILE flag which was first introduced in the diff --git a/sys/contrib/openzfs/tests/test-runner/man/test-runner.1 b/sys/contrib/openzfs/tests/test-runner/man/test-runner.1 index f7cbcbc5b9e9..b823aaa3e1a0 100644 --- a/sys/contrib/openzfs/tests/test-runner/man/test-runner.1 +++ b/sys/contrib/openzfs/tests/test-runner/man/test-runner.1 @@ -210,6 +210,8 @@ to be consumed by the run command. .It Fl d Dry run mode. Execute no tests, but print a description of each test that would have been run. +.It Fl m +Enable kmemleak reporting (Linux only) .It Fl g Create test groups from any directories found while searching for tests. .It Fl o Ar outputdir diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index a565ea8d5174..78802c9fb942 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -70,8 +70,6 @@ export SYSTEM_FILES_COMMON='arp printf ps pwd - python - python2 python3 quotaon readlink diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib index 079272811f2f..89c6382dedc3 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -2258,7 +2258,7 @@ function check_slog_state # pool disk state{online,offline,unavail} # # Return 0 is pool/disk matches expected state, 1 otherwise # -function check_vdev_state # pool disk state{online,offline,unavail} +function check_vdev_state # pool disk state{online,offline,unavail,removed} { typeset pool=$1 typeset disk=${2#*$DEV_DSKDIR/} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh index 3788543b0b2f..b0265c5ee4a1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh @@ -92,27 +92,13 @@ typeset -a pos_cmds_out=( } }") -# -# N.B. json.tool is needed to guarantee consistent ordering of fields, -# sed is needed to trim trailing space in CentOS 6's json.tool output -# -# As of Python 3.5 the behavior of json.tool changed to keep the order -# the same as the input and the --sort-keys option was added. Detect when -# --sort-keys is supported and apply the option to ensure the expected order. -# -if python -m json.tool --sort-keys <<< "{}"; then - JSON_TOOL_CMD="python -m json.tool --sort-keys" -else - JSON_TOOL_CMD="python -m json.tool" -fi - typeset -i cnt=0 typeset cmd for cmd in ${pos_cmds[@]}; do log_must zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 log_must zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 | - $JSON_TOOL_CMD | sed 's/[[:space:]]*$//') + python3 -m json.tool --sort-keys) if [ "$OUTPUT" != "${pos_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${pos_cmds_out[$cnt]}" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh index 42628a0512e9..3023ea47eee2 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh @@ -119,33 +119,33 @@ full_size=$(zfs send $full_snapshot 2>&1 | wc -c) incremental_size=$(zfs send $incremental_snapshot 2>&1 | wc -c) incremental_send=$(zfs send -i $full_snapshot $incremental_snapshot 2>&1 | wc -c) -log_note "verify zfs send -nv" -options="-nv" +log_note "verify zfs send -nvV" +options="-nvV" refer_size=$(get_prop refer $full_snapshot) estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $full_size -log_note "verify zfs send -Pnv" -options="-Pnv" +log_note "verify zfs send -PnvV" +options="-PnvV" estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $full_size -log_note "verify zfs send -nv for multiple snapshot send" -options="-nv" +log_note "verify zfs send -nvV for multiple snapshot send" +options="-nvV" refer_size=$(get_prop refer $incremental_snapshot) estimate_size=$(get_estimate_size $incremental_snapshot $options) log_must verify_size_estimates $options $incremental_size -log_note "verify zfs send -vPn for multiple snapshot send" -options="-vPn" +log_note "verify zfs send -vVPn for multiple snapshot send" +options="-vVPn" estimate_size=$(get_estimate_size $incremental_snapshot $options) log_must verify_size_estimates $options $incremental_size -log_note "verify zfs send -inv for incremental send" -options="-nvi" +log_note "verify zfs send -invV for incremental send" +options="-nvVi" refer_size=$(get_prop refer $incremental_snapshot) deduct_size=$(get_prop refer $full_snapshot) refer_size=$(echo "$refer_size - $deduct_size" | bc) @@ -155,8 +155,8 @@ log_must verify_size_estimates $options $incremental_send estimate_size=$(get_estimate_size $incremental_snapshot $options $full_bookmark) log_must verify_size_estimates $options $incremental_send -log_note "verify zfs send -ivPn for incremental send" -options="-vPni" +log_note "verify zfs send -ivVPn for incremental send" +options="-vVPni" estimate_size=$(get_estimate_size $incremental_snapshot $options $full_snapshot) log_must verify_size_estimates $options $incremental_send @@ -186,16 +186,16 @@ for ds in $datasets; do datasetexists $ds@snap64 || log_fail "Create $ds@snap64 snapshot fail." done recursive_size=$(zfs send -R $full_snapshot 2>&1 | wc -c) -log_note "verify zfs send -Rnv for recursive send" -options="-Rnv" +log_note "verify zfs send -RnvV for recursive send" +options="-RnvV" refer_size=$(get_prop refer $full_snapshot) refer_size=$(echo "$refer_size * 3" | bc) estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $recursive_size -log_note "verify zfs send -RvPn for recursive send" -options="-RvPn" +log_note "verify zfs send -RvVPn for recursive send" +options="-RvVPn" estimate_size=$(get_estimate_size $full_snapshot $options) log_must verify_size_estimates $options $recursive_size diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh index d7189f298384..78eed0f4ce89 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -24,29 +24,28 @@ # # DESCRIPTION: -# Testing Fault Management Agent ZED Logic - Physically removed device is -# made unavail and onlined when reattached +# Testing Fault Management Agent ZED Logic - Physically detached device is +# made removed and onlined when reattached # # STRATEGY: # 1. Create a pool # 2. Simulate physical removal of one device -# 3. Verify the device is unavailable +# 3. Verify the device is removed when detached # 4. Reattach the device # 5. Verify the device is onlined # 6. Repeat the same tests with a spare device: # zed will use the spare to handle the removed data device # 7. Repeat the same tests again with a faulted spare device: -# the removed data device should be unavailable +# the removed data device should be removed # # NOTE: the use of 'block_device_wait' throughout the test helps avoid race # conditions caused by mixing creation/removal events from partitioning the # disk (zpool create) and events from physically removing it (remove_disk). # -# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a -# vdev to the unavailable state. The ZED does receive a removal notification -# but only relies on it to activate a hot spare. Additional work is planned -# to extend an existing ioctl interface to allow the ZED to transition the -# vdev in to a removed state. +# NOTE: the test relies on ZED to transit state to removed on device removed +# event. The ZED does receive a removal notification but only relies on it to +# activate a hot spare. Additional work is planned to extend an existing ioctl +# interface to allow the ZED to transition the vdev in to a removed state. # verify_runnable "both" @@ -104,8 +103,8 @@ do log_must mkfile 1m $mntpnt/file log_must zpool sync $TESTPOOL - # 3. Verify the device is unavailable. - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + # 3. Verify the device is removed. + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 4. Reattach the device insert_disk $removedev @@ -143,7 +142,7 @@ do # 3. Verify the device is handled by the spare. log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 4. Reattach the device insert_disk $removedev @@ -178,8 +177,8 @@ do log_must mkfile 1m $mntpnt/file log_must zpool sync $TESTPOOL - # 4. Verify the device is unavailable - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + # 4. Verify the device is removed + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 5. Reattach the device insert_disk $removedev diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in index 4ca610e5f1e9..1f58d8116b68 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in @@ -30,7 +30,7 @@ verify_runnable "global" # Verify that the required dependencies for testing are installed. @PYTHON@ -c "import cffi" 2>/dev/null if [ $? -eq 1 ]; then - log_unsupported "python-cffi not found by Python" + log_unsupported "python3-cffi not found by Python" fi # We don't just try to "import libzfs_core" because we want to skip these tests diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh index 370f5382ebae..661fbe85db82 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh @@ -87,7 +87,7 @@ log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir # OpenZFS issue #7432 log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2 log_must touch /$TESTPOOL/$TESTFS2/attrs -log_must eval "python -c 'print \"a\" * 4096' | \ +log_must eval "python3 -c 'print \"a\" * 4096' | \ set_xattr_stdin bigval /$TESTPOOL/$TESTFS2/attrs" log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2 diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh index 551ed15db254..bd30488eaab0 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh @@ -88,7 +88,7 @@ log_must zfs snapshot $POOL/fs@c # 4. Create an empty file and add xattrs to it to exercise reclaiming a # dnode that requires more than 1 slot for its bonus buffer (Zol #7433) log_must zfs set compression=on xattr=sa $POOL/fs -log_must eval "python -c 'print \"a\" * 512' | +log_must eval "python3 -c 'print \"a\" * 512' | set_xattr_stdin bigval /$POOL/fs/attrs" log_must zfs snapshot $POOL/fs@d diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 90c8cd12b042..36a8a00a1d44 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -92,7 +92,13 @@ /* #undef HAVE_BDEV_CHECK_MEDIA_CHANGE */ /* bdev_*_io_acct() available */ -/* #undef HAVE_BDEV_IO_ACCT */ +/* #undef HAVE_BDEV_IO_ACCT_63 */ + +/* bdev_*_io_acct() available */ +/* #undef HAVE_BDEV_IO_ACCT_OLD */ + +/* bdev_kobj() exists */ +/* #undef HAVE_BDEV_KOBJ */ /* bdev_max_discard_sectors() is available */ /* #undef HAVE_BDEV_MAX_DISCARD_SECTORS */ @@ -533,6 +539,9 @@ /* folio_wait_bit() exists */ /* #undef HAVE_PAGEMAP_FOLIO_WAIT_BIT */ +/* part_to_dev() exists */ +/* #undef HAVE_PART_TO_DEV */ + /* iops->getattr() takes a path */ /* #undef HAVE_PATH_IOPS_GETATTR */ @@ -944,7 +953,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.9-FreeBSD_g92e0d9d18" +#define ZFS_META_ALIAS "zfs-2.1.11-FreeBSD_ge25f9131d" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -953,7 +962,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.1" +#define ZFS_META_KVER_MAX "6.2" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "3.10" @@ -974,10 +983,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_g92e0d9d18" +#define ZFS_META_RELEASE "FreeBSD_ge25f9131d" /* Define the project version. */ -#define ZFS_META_VERSION "2.1.9" +#define ZFS_META_VERSION "2.1.11" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 92f5397be674..b04e2494c739 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.9-0-g92e0d9d18" +#define ZFS_META_GITREV "zfs-2.1.11-0-ge25f9131d" |